[Mesa-dev] [PATCH v2 04/15] i965/fs: Add support for MOV_INDIRECT on pre-Broadwell hardware

Matt Turner mattst88 at gmail.com
Thu Apr 7 23:12:30 UTC 2016


On Tue, Mar 22, 2016 at 3:33 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> While we're at it, we also add support for the possibility that the
> indirect is, in fact, a constant.  This shouldn't happen in the common case
> (if it does, that means NIR failed to constant-fold something), but it's
> possible so we should handle it.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp           |  4 ++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 67 +++++++++++++++++++++-----
>  2 files changed, 58 insertions(+), 13 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index d41c8a8..91487c9 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4481,6 +4481,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
>     case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
>        return 8;
>
> +   case SHADER_OPCODE_MOV_INDIRECT:
> +      /* Prior to Broadwell, we only have 8 address subregisters */
> +      return devinfo->gen < 8 ? 8 : inst->exec_size;
> +
>     default:
>        return inst->exec_size;
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 35400cb..4e89a8a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -351,22 +351,63 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
>
>     unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
>
> -   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
> -   struct brw_reg addr = vec8(brw_address_reg(0));
> +   if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
> +      imm_byte_offset += indirect_byte_offset.ud;
>
> -   /* The destination stride of an instruction (in bytes) must be greater
> -    * than or equal to the size of the rest of the instruction.  Since the
> -    * address register is of type UW, we can't use a D-type instruction.
> -    * In order to get around this, re re-type to UW and use a stride.
> -    */
> -   indirect_byte_offset =
> -      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
> +      reg.nr = imm_byte_offset / REG_SIZE;
> +      reg.subnr = imm_byte_offset % REG_SIZE;
> +      brw_MOV(p, dst, reg);
> +   } else {
> +      /* Prior to Broadwell, there are only 8 address registers. */
> +      assert(inst->exec_size == 8 || devinfo->gen >= 8);
> +
> +      /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
> +      struct brw_reg addr = vec8(brw_address_reg(0));
> +
> +      /* The destination stride of an instruction (in bytes) must be greater
> +       * than or equal to the size of the rest of the instruction.  Since the
> +       * address register is of type UW, we can't use a D-type instruction.
> +       * In order to get around this, re re-type to UW and use a stride.
> +       */
> +      indirect_byte_offset =
> +         retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
> +
> +      struct brw_reg ind_src;
> +      if (devinfo->gen < 8) {
> +         /* Prior to broadwell, we have a restriction that the bottom 5 bits
> +          * of the base offset and the bottom 5 bits of the indirect must add
> +          * to less than 32.  In other words, the hardware needs to be able to
> +          * add the bottom five bits of the two to get the subnumber and add

This needs to include (or simply be) a citation. I'm not familiar with
restriction, and in a few minutes of searching the IVB PRM I didn't
see anything that seems to match it.

> +          * the next 7 bits of each to get the actual register number.  Since
> +          * the indirect may cause us to cross a register boundary, this makes
> +          * it almost useless.  We could try and do something clever where we
> +          * use a actual base offset if base_offset % 32 == 0 but that would
> +          * mean we were generating different code depending on the base
> +          * offset.  Instead, for the sake of consistency, we'll just do the
> +          * add ourselves.
> +          */
> +         brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
> +         ind_src = brw_VxH_indirect(0, 0);
> +      } else {
> +         brw_MOV(p, addr, indirect_byte_offset);
> +         ind_src = brw_VxH_indirect(0, imm_byte_offset);
> +      }
>
> -   /* Prior to Broadwell, there are only 8 address registers. */
> -   assert(inst->exec_size == 8 || devinfo->gen >= 8);
> +      brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
>
> -   brw_MOV(p, addr, indirect_byte_offset);
> -   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
> +      if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
> +          !inst->get_next()->is_head_sentinel() &&
> +          ((fs_inst *)inst->get_next())->mlen > 0) {

Newline here, before a multiline comment.

> +         /* From the Sandybridge PRM:
> +          *
> +          *    "[Errata: DevSNB(SNB)] If MRF register is updated by any
> +          *    instruction that “indexed/indirect” source AND is followed by a
> +          *    send, the instruction requires a “Switch”. This is to avoid
> +          *    race condition where send may dispatch before MRF is updated."
> +          */
> +         brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
> +      }
> +   }
>  }
>
>  void
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list