[Mesa-dev] [PATCH 1/5] intel/fs: Exclude control sources from execution type and region alignment calculations.
Jason Ekstrand
jason at jlekstrand.net
Thu Feb 14 21:12:23 UTC 2019
On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <currojerez at riseup.net>
wrote:
> Currently the execution type calculation will return a bogus value in
> cases like:
>
> mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u
>
> Which will be considered to have a 32-bit integer execution type even
> though the actual indirect move operation will be carried out with
> 16-bit precision.
>
> Similarly there's no need to apply the CHV/BXT double-precision region
> alignment restrictions to such control sources, since they aren't
> directly involved in the double-precision arithmetic operations
> emitted by these virtual instructions. Applying the CHV/BXT
> restrictions to control sources was expected to be harmless if mildly
> inefficient, but unfortunately it exposed problems at codegen level
> for virtual instructions (namely the SHUFFLE instruction used for the
> Vulkan 1.1 subgroup feature) that weren't prepared to accept control
> sources with an arbitrary strided region.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328
> Reported-by: Mark Janes <mark.a.janes at intel.com>
> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass."
> ---
> src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++
> src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +--
> src/intel/compiler/brw_ir_fs.h | 10 +++-
> 3 files changed, 66 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 0359eb079f7..f475b617df2 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const
> }
> }
>
> +bool
> +fs_inst::is_control_source(unsigned arg) const
> +{
> + switch (opcode) {
> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
> + return arg == 0;
> +
> + case SHADER_OPCODE_BROADCAST:
> + case SHADER_OPCODE_SHUFFLE:
> + case SHADER_OPCODE_QUAD_SWIZZLE:
> + case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
> + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
> + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
> + case SHADER_OPCODE_IMAGE_SIZE:
> + case SHADER_OPCODE_GET_BUFFER_SIZE:
> + return arg == 1;
> +
> + case SHADER_OPCODE_MOV_INDIRECT:
> + case SHADER_OPCODE_CLUSTER_BROADCAST:
> + case SHADER_OPCODE_TEX:
> + case FS_OPCODE_TXB:
> + case SHADER_OPCODE_TXD:
> + case SHADER_OPCODE_TXF:
> + case SHADER_OPCODE_TXF_LZ:
> + case SHADER_OPCODE_TXF_CMS:
> + case SHADER_OPCODE_TXF_CMS_W:
> + case SHADER_OPCODE_TXF_UMS:
> + case SHADER_OPCODE_TXF_MCS:
> + case SHADER_OPCODE_TXL:
> + case SHADER_OPCODE_TXL_LZ:
> + case SHADER_OPCODE_TXS:
> + case SHADER_OPCODE_LOD:
> + case SHADER_OPCODE_TG4:
> + case SHADER_OPCODE_TG4_OFFSET:
> + case SHADER_OPCODE_SAMPLEINFO:
> + case SHADER_OPCODE_UNTYPED_ATOMIC:
> + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
> + case SHADER_OPCODE_UNTYPED_SURFACE_READ:
> + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
> + case SHADER_OPCODE_BYTE_SCATTERED_READ:
> + case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
> + case SHADER_OPCODE_TYPED_ATOMIC:
> + case SHADER_OPCODE_TYPED_SURFACE_READ:
> + case SHADER_OPCODE_TYPED_SURFACE_WRITE:
>
As of b284d222db, we are no longer using many of the opcodes in this list
(gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will
need to be rebased and we need to add SHADER_OPCODE_SEND to the list.
Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0
cut-off so there is no need to make two versions for backporting.
Other than that, this patch seems perfectly reasonable to me
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
If you want me to hand-review the new list of opcodes, feel free to send a
v2 and cc me.
> + return arg == 1 || arg == 2;
> +
> + default:
> + return false;
> + }
> +}
> +
> /**
> * Returns true if this instruction's sources and destinations cannot
> * safely be the same register.
> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp
> b/src/intel/compiler/brw_fs_lower_regioning.cpp
> index df50993dee6..6a3c23892b4 100644
> --- a/src/intel/compiler/brw_fs_lower_regioning.cpp
> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
> @@ -74,7 +74,7 @@ namespace {
> unsigned stride = inst->dst.stride * type_sz(inst->dst.type);
>
> for (unsigned i = 0; i < inst->sources; i++) {
> - if (!is_uniform(inst->src[i]))
> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
> stride = MAX2(stride, inst->src[i].stride *
> type_sz(inst->src[i].type));
> }
> @@ -92,7 +92,7 @@ namespace {
> required_dst_byte_offset(const fs_inst *inst)
> {
> for (unsigned i = 0; i < inst->sources; i++) {
> - if (!is_uniform(inst->src[i]))
> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
> if (reg_offset(inst->src[i]) % REG_SIZE !=
> reg_offset(inst->dst) % REG_SIZE)
> return 0;
> @@ -109,7 +109,7 @@ namespace {
> has_invalid_src_region(const gen_device_info *devinfo, const fs_inst
> *inst,
> unsigned i)
> {
> - if (is_unordered(inst)) {
> + if (is_unordered(inst) || inst->is_control_source(i)) {
> return false;
> } else {
> const unsigned dst_byte_stride = inst->dst.stride *
> type_sz(inst->dst.type);
> diff --git a/src/intel/compiler/brw_ir_fs.h
> b/src/intel/compiler/brw_ir_fs.h
> index 08e3d83d910..0a0ba1d363a 100644
> --- a/src/intel/compiler/brw_ir_fs.h
> +++ b/src/intel/compiler/brw_ir_fs.h
> @@ -358,6 +358,13 @@ public:
> bool can_change_types() const;
> bool has_source_and_destination_hazard() const;
>
> + /**
> + * Return whether \p arg is a control source of a virtual instruction
> which
> + * shouldn't contribute to the execution type and usual regioning
> + * restriction calculations of arithmetic instructions.
> + */
> + bool is_control_source(unsigned arg) const;
> +
> /**
> * Return the subset of flag registers read by the instruction as a
> bitset
> * with byte granularity.
> @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst)
> brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
>
> for (int i = 0; i < inst->sources; i++) {
> - if (inst->src[i].file != BAD_FILE) {
> + if (inst->src[i].file != BAD_FILE &&
> + !inst->is_control_source(i)) {
> const brw_reg_type t = get_exec_type(inst->src[i].type);
> if (type_sz(t) > type_sz(exec_type))
> exec_type = t;
> --
> 2.19.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190214/916c880a/attachment-0001.html>
More information about the mesa-dev
mailing list