[Mesa-dev] [PATCH 1/5] intel/fs: Exclude control sources from execution type and region alignment calculations.

Anuj Phogat anuj.phogat at gmail.com
Thu Feb 14 17:09:27 UTC 2019


Fixes all subgroup test failures in vulkancts on Icelake.
Series is:
Tested-by: Anuj Phogat <anuj.phogat at gmail.com>
On Fri, Jan 18, 2019 at 4:09 PM Francisco Jerez <currojerez at riseup.net> wrote:
>
> Currently the execution type calculation will return a bogus value in
> cases like:
>
>   mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u
>
> Which will be considered to have a 32-bit integer execution type even
> though the actual indirect move operation will be carried out with
> 16-bit precision.
>
> Similarly there's no need to apply the CHV/BXT double-precision region
> alignment restrictions to such control sources, since they aren't
> directly involved in the double-precision arithmetic operations
> emitted by these virtual instructions.  Applying the CHV/BXT
> restrictions to control sources was expected to be harmless if mildly
> inefficient, but unfortunately it exposed problems at codegen level
> for virtual instructions (namely the SHUFFLE instruction used for the
> Vulkan 1.1 subgroup feature) that weren't prepared to accept control
> sources with an arbitrary strided region.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328
> Reported-by: Mark Janes <mark.a.janes at intel.com>
> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass."
> ---
>  src/intel/compiler/brw_fs.cpp                 | 54 +++++++++++++++++++
>  src/intel/compiler/brw_fs_lower_regioning.cpp |  6 +--
>  src/intel/compiler/brw_ir_fs.h                | 10 +++-
>  3 files changed, 66 insertions(+), 4 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 0359eb079f7..f475b617df2 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const
>     }
>  }
>
> +bool
> +fs_inst::is_control_source(unsigned arg) const
> +{
> +   switch (opcode) {
> +   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
> +   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
> +   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
> +   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 is now removed from
master. This series need a rebase.
> +      return arg == 0;
> +
> +   case SHADER_OPCODE_BROADCAST:
> +   case SHADER_OPCODE_SHUFFLE:
> +   case SHADER_OPCODE_QUAD_SWIZZLE:
> +   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
> +   case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
> +   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
> +   case SHADER_OPCODE_IMAGE_SIZE:
> +   case SHADER_OPCODE_GET_BUFFER_SIZE:
> +      return arg == 1;
> +
> +   case SHADER_OPCODE_MOV_INDIRECT:
> +   case SHADER_OPCODE_CLUSTER_BROADCAST:
> +   case SHADER_OPCODE_TEX:
> +   case FS_OPCODE_TXB:
> +   case SHADER_OPCODE_TXD:
> +   case SHADER_OPCODE_TXF:
> +   case SHADER_OPCODE_TXF_LZ:
> +   case SHADER_OPCODE_TXF_CMS:
> +   case SHADER_OPCODE_TXF_CMS_W:
> +   case SHADER_OPCODE_TXF_UMS:
> +   case SHADER_OPCODE_TXF_MCS:
> +   case SHADER_OPCODE_TXL:
> +   case SHADER_OPCODE_TXL_LZ:
> +   case SHADER_OPCODE_TXS:
> +   case SHADER_OPCODE_LOD:
> +   case SHADER_OPCODE_TG4:
> +   case SHADER_OPCODE_TG4_OFFSET:
> +   case SHADER_OPCODE_SAMPLEINFO:
> +   case SHADER_OPCODE_UNTYPED_ATOMIC:
> +   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
> +   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
> +   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
> +   case SHADER_OPCODE_BYTE_SCATTERED_READ:
> +   case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
> +   case SHADER_OPCODE_TYPED_ATOMIC:
> +   case SHADER_OPCODE_TYPED_SURFACE_READ:
> +   case SHADER_OPCODE_TYPED_SURFACE_WRITE:
> +      return arg == 1 || arg == 2;
> +
> +   default:
> +      return false;
> +   }
> +}
> +
>  /**
>   * Returns true if this instruction's sources and destinations cannot
>   * safely be the same register.
> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp
> index df50993dee6..6a3c23892b4 100644
> --- a/src/intel/compiler/brw_fs_lower_regioning.cpp
> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp
> @@ -74,7 +74,7 @@ namespace {
>           unsigned stride = inst->dst.stride * type_sz(inst->dst.type);
>
>           for (unsigned i = 0; i < inst->sources; i++) {
> -            if (!is_uniform(inst->src[i]))
> +            if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
>                 stride = MAX2(stride, inst->src[i].stride *
>                               type_sz(inst->src[i].type));
>           }
> @@ -92,7 +92,7 @@ namespace {
>     required_dst_byte_offset(const fs_inst *inst)
>     {
>        for (unsigned i = 0; i < inst->sources; i++) {
> -         if (!is_uniform(inst->src[i]))
> +         if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
>              if (reg_offset(inst->src[i]) % REG_SIZE !=
>                  reg_offset(inst->dst) % REG_SIZE)
>                 return 0;
> @@ -109,7 +109,7 @@ namespace {
>     has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,
>                            unsigned i)
>     {
> -      if (is_unordered(inst)) {
> +      if (is_unordered(inst) || inst->is_control_source(i)) {
>           return false;
>        } else {
>           const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
> diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
> index 08e3d83d910..0a0ba1d363a 100644
> --- a/src/intel/compiler/brw_ir_fs.h
> +++ b/src/intel/compiler/brw_ir_fs.h
> @@ -358,6 +358,13 @@ public:
>     bool can_change_types() const;
>     bool has_source_and_destination_hazard() const;
>
> +   /**
> +    * Return whether \p arg is a control source of a virtual instruction which
> +    * shouldn't contribute to the execution type and usual regioning
> +    * restriction calculations of arithmetic instructions.
> +    */
> +   bool is_control_source(unsigned arg) const;
> +
>     /**
>      * Return the subset of flag registers read by the instruction as a bitset
>      * with byte granularity.
> @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst)
>     brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
>
>     for (int i = 0; i < inst->sources; i++) {
> -      if (inst->src[i].file != BAD_FILE) {
> +      if (inst->src[i].file != BAD_FILE &&
> +          !inst->is_control_source(i)) {
>           const brw_reg_type t = get_exec_type(inst->src[i].type);
>           if (type_sz(t) > type_sz(exec_type))
>              exec_type = t;
> --
> 2.19.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list