[Mesa-dev] [PATCH 1/5] intel/fs: Exclude control sources from execution type and region alignment calculations.
Francisco Jerez
currojerez at riseup.net
Fri Feb 15 03:33:17 UTC 2019
Jason Ekstrand <jason at jlekstrand.net> writes:
> On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <currojerez at riseup.net>
> wrote:
>
>> Currently the execution type calculation will return a bogus value in
>> cases like:
>>
>> mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u
>>
>> Which will be considered to have a 32-bit integer execution type even
>> though the actual indirect move operation will be carried out with
>> 16-bit precision.
>>
>> Similarly there's no need to apply the CHV/BXT double-precision region
>> alignment restrictions to such control sources, since they aren't
>> directly involved in the double-precision arithmetic operations
>> emitted by these virtual instructions. Applying the CHV/BXT
>> restrictions to control sources was expected to be harmless if mildly
>> inefficient, but unfortunately it exposed problems at codegen level
>> for virtual instructions (namely the SHUFFLE instruction used for the
>> Vulkan 1.1 subgroup feature) that weren't prepared to accept control
>> sources with an arbitrary strided region.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328
>> Reported-by: Mark Janes <mark.a.janes at intel.com>
>> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass."
>> ---
>> src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++
>> src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +--
>> src/intel/compiler/brw_ir_fs.h | 10 +++-
>> 3 files changed, 66 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
>> index 0359eb079f7..f475b617df2 100644
>> --- a/src/intel/compiler/brw_fs.cpp
>> +++ b/src/intel/compiler/brw_fs.cpp
>> @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const
>> }
>> }
>>
>> +bool
>> +fs_inst::is_control_source(unsigned arg) const
>> +{
>> + switch (opcode) {
>> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
>> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
>> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
>> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
>> + return arg == 0;
>> +
>> + case SHADER_OPCODE_BROADCAST:
>> + case SHADER_OPCODE_SHUFFLE:
>> + case SHADER_OPCODE_QUAD_SWIZZLE:
>> + case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
>> + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
>> + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
>> + case SHADER_OPCODE_IMAGE_SIZE:
>> + case SHADER_OPCODE_GET_BUFFER_SIZE:
>> + return arg == 1;
>> +
>> + case SHADER_OPCODE_MOV_INDIRECT:
>> + case SHADER_OPCODE_CLUSTER_BROADCAST:
>> + case SHADER_OPCODE_TEX:
>> + case FS_OPCODE_TXB:
>> + case SHADER_OPCODE_TXD:
>> + case SHADER_OPCODE_TXF:
>> + case SHADER_OPCODE_TXF_LZ:
>> + case SHADER_OPCODE_TXF_CMS:
>> + case SHADER_OPCODE_TXF_CMS_W:
>> + case SHADER_OPCODE_TXF_UMS:
>> + case SHADER_OPCODE_TXF_MCS:
>> + case SHADER_OPCODE_TXL:
>> + case SHADER_OPCODE_TXL_LZ:
>> + case SHADER_OPCODE_TXS:
>> + case SHADER_OPCODE_LOD:
>> + case SHADER_OPCODE_TG4:
>> + case SHADER_OPCODE_TG4_OFFSET:
>> + case SHADER_OPCODE_SAMPLEINFO:
>> + case SHADER_OPCODE_UNTYPED_ATOMIC:
>> + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
>> + case SHADER_OPCODE_UNTYPED_SURFACE_READ:
>> + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
>> + case SHADER_OPCODE_BYTE_SCATTERED_READ:
>> + case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
>> + case SHADER_OPCODE_TYPED_ATOMIC:
>> + case SHADER_OPCODE_TYPED_SURFACE_READ:
>> + case SHADER_OPCODE_TYPED_SURFACE_WRITE:
>>
>
> As of b284d222db, we are no longer using many of the opcodes in this list
> (gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will
> need to be rebased and we need to add SHADER_OPCODE_SEND to the list.
> Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0
> cut-off so there is no need to make two versions for backporting.
>
Yes, that's roughly what I had done during one of my previous rebases of
this series, see:
https://cgit.freedesktop.org/~currojerez/mesa/commit/?h=jenkins&id=30f8f3ff48b02ead688705e0679a98c0d6c9c87e
> Other than that, this patch seems perfectly reasonable to me
>
> Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
>
> If you want me to hand-review the new list of opcodes, feel free to send a
> v2 and cc me.
>
>
>> + return arg == 1 || arg == 2;
>> +
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> /**
>> * Returns true if this instruction's sources and destinations cannot
>> * safely be the same register.
>> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp
>> b/src/intel/compiler/brw_fs_lower_regioning.cpp
>> index df50993dee6..6a3c23892b4 100644
>> --- a/src/intel/compiler/brw_fs_lower_regioning.cpp
>> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
>> @@ -74,7 +74,7 @@ namespace {
>> unsigned stride = inst->dst.stride * type_sz(inst->dst.type);
>>
>> for (unsigned i = 0; i < inst->sources; i++) {
>> - if (!is_uniform(inst->src[i]))
>> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
>> stride = MAX2(stride, inst->src[i].stride *
>> type_sz(inst->src[i].type));
>> }
>> @@ -92,7 +92,7 @@ namespace {
>> required_dst_byte_offset(const fs_inst *inst)
>> {
>> for (unsigned i = 0; i < inst->sources; i++) {
>> - if (!is_uniform(inst->src[i]))
>> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
>> if (reg_offset(inst->src[i]) % REG_SIZE !=
>> reg_offset(inst->dst) % REG_SIZE)
>> return 0;
>> @@ -109,7 +109,7 @@ namespace {
>> has_invalid_src_region(const gen_device_info *devinfo, const fs_inst
>> *inst,
>> unsigned i)
>> {
>> - if (is_unordered(inst)) {
>> + if (is_unordered(inst) || inst->is_control_source(i)) {
>> return false;
>> } else {
>> const unsigned dst_byte_stride = inst->dst.stride *
>> type_sz(inst->dst.type);
>> diff --git a/src/intel/compiler/brw_ir_fs.h
>> b/src/intel/compiler/brw_ir_fs.h
>> index 08e3d83d910..0a0ba1d363a 100644
>> --- a/src/intel/compiler/brw_ir_fs.h
>> +++ b/src/intel/compiler/brw_ir_fs.h
>> @@ -358,6 +358,13 @@ public:
>> bool can_change_types() const;
>> bool has_source_and_destination_hazard() const;
>>
>> + /**
>> + * Return whether \p arg is a control source of a virtual instruction
>> which
>> + * shouldn't contribute to the execution type and usual regioning
>> + * restriction calculations of arithmetic instructions.
>> + */
>> + bool is_control_source(unsigned arg) const;
>> +
>> /**
>> * Return the subset of flag registers read by the instruction as a
>> bitset
>> * with byte granularity.
>> @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst)
>> brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
>>
>> for (int i = 0; i < inst->sources; i++) {
>> - if (inst->src[i].file != BAD_FILE) {
>> + if (inst->src[i].file != BAD_FILE &&
>> + !inst->is_control_source(i)) {
>> const brw_reg_type t = get_exec_type(inst->src[i].type);
>> if (type_sz(t) > type_sz(exec_type))
>> exec_type = t;
>> --
>> 2.19.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190214/c58a5f11/attachment-0001.sig>
More information about the mesa-dev
mailing list