[Mesa-stable] [PATCH] intel/fs: Set up sampler message headers in the visitor on gen7+
Francisco Jerez
currojerez at riseup.net
Thu Mar 1 20:38:10 UTC 2018
Jason Ekstrand <jason at jlekstrand.net> writes:
> This gives the scheduler visibility into the headers which should
> improve scheduling. More importantly, however, it lets the scheduler
> know that the header gets written. As-is, the scheduler thinks that a
> texture instruction only reads it's payload and is unaware that it may
> write to the first register so it may reorder it with respect to a read
> from that register. This is causing issues in a couple of Dota 2 vertex
> shaders.
>
Yikes... Corrupting your GRF since 2012... Render target writes
probably need a similar treatment.
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923
> Cc: mesa-stable at lists.freedesktop.org
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
> ---
> src/intel/compiler/brw_fs.cpp | 40 +++++++++++++++++++++++++++++----
> src/intel/compiler/brw_fs_generator.cpp | 21 +++--------------
> 2 files changed, 39 insertions(+), 22 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 113f62c..ab8cc89 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
> op == SHADER_OPCODE_SAMPLEINFO ||
> is_high_sampler(devinfo, sampler)) {
> /* For general texture offsets (no txf workaround), we need a header to
> - * put them in. Note that we're only reserving space for it in the
> - * message payload as it will be initialized implicitly by the
> - * generator.
> + * put them in.
> *
> * TG4 needs to place its channel select in the header, for interaction
> * with ARB_texture_swizzle. The sampler index is only 4-bits, so for
> * larger sampler numbers we need to offset the Sampler State Pointer in
> * the header.
> */
> + fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD);
> header_size = 1;
> - sources[0] = fs_reg();
> length++;
>
> /* If we're requesting fewer than four channels worth of response,
> @@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
> unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
> inst->offset |= mask << 12;
> }
> +
> + /* Build the actual header */
> + const fs_builder ubld = bld.exec_all().group(8, 0);
> + const fs_builder ubld1 = ubld.group(1, 0);
> + ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
> + if (inst->offset) {
> + ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
> + } else if (bld.shader->stage != MESA_SHADER_VERTEX &&
> + bld.shader->stage != MESA_SHADER_FRAGMENT) {
> + /* The vertex and fragment stages have g0.2 set to 0, so
> + * header0.2 is 0 when g0 is copied. Other stages may not, so we
> + * must set it to 0 to avoid setting undesirable bits in the
> + * message.
> + */
> + ubld1.MOV(component(header, 2), brw_imm_ud(0));
> + }
> +
> + if (is_high_sampler(devinfo, sampler)) {
> + if (sampler.file == BRW_IMMEDIATE_VALUE) {
> + assert(sampler.ud >= 16);
> + const int sampler_state_size = 16; /* 16 bytes */
> +
> + ubld1.ADD(component(header, 3),
> + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
> + brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));
> + } else {
> + fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
> + ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
> + ubld1.SHL(tmp, tmp, brw_imm_ud(4));
> + ubld1.ADD(component(header, 3),
> + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
> + tmp);
> + }
> + }
> }
>
> if (shadow_c.file != BAD_FILE) {
> diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
> index b59c09f..a5a821a 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
> * we need to set it up explicitly and load the offset bitfield.
> * Otherwise, we can use an implied move from g0 to the first message reg.
> */
> - if (inst->header_size != 0) {
> + if (inst->header_size != 0 && devinfo->gen < 7) {
> if (devinfo->gen < 6 && !inst->offset) {
> /* Set up an implied move from g0 to the MRF. */
> src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
> } else {
> - struct brw_reg header_reg;
> -
> - if (devinfo->gen >= 7) {
> - header_reg = src;
> - } else {
> - assert(inst->base_mrf != -1);
> - header_reg = brw_message_reg(inst->base_mrf);
> - }
> + assert(inst->base_mrf != -1);
> + struct brw_reg header_reg = brw_message_reg(inst->base_mrf);
>
> brw_push_insn_state(p);
> brw_set_default_exec_size(p, BRW_EXECUTE_8);
> @@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
> /* Set the offset bits in DWord 2. */
> brw_MOV(p, get_element_ud(header_reg, 2),
> brw_imm_ud(inst->offset));
> - } else if (stage != MESA_SHADER_VERTEX &&
> - stage != MESA_SHADER_FRAGMENT) {
> - /* The vertex and fragment stages have g0.2 set to 0, so
> - * header0.2 is 0 when g0 is copied. Other stages may not, so we
> - * must set it to 0 to avoid setting undesirable bits in the
> - * message.
> - */
> - brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
> }
>
> - brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
> brw_pop_insn_state(p);
> }
> }
> --
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180301/224dc1a5/attachment.sig>
More information about the mesa-stable
mailing list