[Mesa-dev] [PATCH v2 04/15] i965/fs: Add support for MOV_INDIRECT on pre-Broadwell hardware
Kenneth Graunke
kenneth at whitecape.org
Thu Apr 14 21:32:29 UTC 2016
On Tuesday, March 22, 2016 3:33:39 PM PDT Jason Ekstrand wrote:
> While we're at it, we also add support for the possibility that the
> indirect is, in fact, a constant. This shouldn't happen in the common case
> (if it does, that means NIR failed to constant-fold something), but it's
> possible so we should handle it.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 67 ++++++++++++++++++++
+-----
> 2 files changed, 58 insertions(+), 13 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/
i965/brw_fs.cpp
> index d41c8a8..91487c9 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -4481,6 +4481,10 @@ get_lowered_simd_width(const struct brw_device_info
*devinfo,
> case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
> return 8;
>
> + case SHADER_OPCODE_MOV_INDIRECT:
> + /* Prior to Broadwell, we only have 8 address subregisters */
> + return devinfo->gen < 8 ? 8 : inst->exec_size;
> +
> default:
> return inst->exec_size;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/
drivers/dri/i965/brw_fs_generator.cpp
> index 35400cb..4e89a8a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -351,22 +351,63 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
>
> unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
>
> - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
> - struct brw_reg addr = vec8(brw_address_reg(0));
> + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
> + imm_byte_offset += indirect_byte_offset.ud;
>
> - /* The destination stride of an instruction (in bytes) must be greater
> - * than or equal to the size of the rest of the instruction. Since the
> - * address register is of type UW, we can't use a D-type instruction.
> - * In order to get around this, re re-type to UW and use a stride.
> - */
> - indirect_byte_offset =
> - retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
> + reg.nr = imm_byte_offset / REG_SIZE;
> + reg.subnr = imm_byte_offset % REG_SIZE;
> + brw_MOV(p, dst, reg);
> + } else {
> + /* Prior to Broadwell, there are only 8 address registers. */
> + assert(inst->exec_size == 8 || devinfo->gen >= 8);
> +
> + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
> + struct brw_reg addr = vec8(brw_address_reg(0));
> +
> + /* The destination stride of an instruction (in bytes) must be
greater
> + * than or equal to the size of the rest of the instruction. Since
the
> + * address register is of type UW, we can't use a D-type instruction.
> + * In order to get around this, re re-type to UW and use a stride.
> + */
> + indirect_byte_offset =
> + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
> +
> + struct brw_reg ind_src;
> + if (devinfo->gen < 8) {
> + /* Prior to broadwell, we have a restriction that the bottom 5
bits
> + * of the base offset and the bottom 5 bits of the indirect must
add
> + * to less than 32. In other words, the hardware needs to be able
to
> + * add the bottom five bits of the two to get the subnumber and
add
> + * the next 7 bits of each to get the actual register number.
Since
> + * the indirect may cause us to cross a register boundary, this
makes
> + * it almost useless. We could try and do something clever where
we
> + * use a actual base offset if base_offset % 32 == 0 but that
would
> + * mean we were generating different code depending on the base
> + * offset. Instead, for the sake of consistency, we'll just do
the
> + * add ourselves.
> + */
> + brw_ADD(p, addr, indirect_byte_offset,
brw_imm_uw(imm_byte_offset));
> + ind_src = brw_VxH_indirect(0, 0);
> + } else {
> + brw_MOV(p, addr, indirect_byte_offset);
> + ind_src = brw_VxH_indirect(0, imm_byte_offset);
> + }
>
> - /* Prior to Broadwell, there are only 8 address registers. */
> - assert(inst->exec_size == 8 || devinfo->gen >= 8);
> + brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
>
> - brw_MOV(p, addr, indirect_byte_offset);
> - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
> + if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
> + !inst->get_next()->is_head_sentinel() &&
> + ((fs_inst *)inst->get_next())->mlen > 0) {
Don't you mean:
if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
!inst->get_prev()->is_head_sentinel() &&
((fs_inst *) inst->get_prev())->mlen > 0) {
Seems like you're walking forward while facing backward...
> + /* From the Sandybridge PRM:
> + *
> + * "[Errata: DevSNB(SNB)] If MRF register is updated by any
> + * instruction that “indexed/indirect” source AND is followed
by a
> + * send, the instruction requires a “Switch”. This is to avoid
> + * race condition where send may dispatch before MRF is
updated."
> + */
> + brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
> + }
> + }
> }
>
> void
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160414/43b682f4/attachment-0001.sig>
More information about the mesa-dev
mailing list