[Mesa-stable] [PATCH] intel/fs: Set up sampler message headers in the visitor on gen7+

Jason Ekstrand jason at jlekstrand.net
Thu Mar 1 23:05:16 UTC 2018


On Thu, Mar 1, 2018 at 12:38 PM, Francisco Jerez <currojerez at riseup.net>
wrote:

> Jason Ekstrand <jason at jlekstrand.net> writes:
>
> > This gives the scheduler visibility into the headers which should
> > improve scheduling.  More importantly, however, it lets the scheduler
> > know that the header gets written.  As-is, the scheduler thinks that a
> > texture instruction only reads it's payload and is unaware that it may
> > write to the first register so it may reorder it with respect to a read
> > from that register.  This is causing issues in a couple of Dota 2 vertex
> > shaders.
> >
>
> Yikes...  Corrupting your GRF since 2012...  Render target writes
> probably need a similar treatment.
>

Yeah... It all needs a similar treatment. :-)


> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923
> > Cc: mesa-stable at lists.freedesktop.org
>
> Reviewed-by: Francisco Jerez <currojerez at riseup.net>
>

Thanks!


> > ---
> >  src/intel/compiler/brw_fs.cpp           | 40
> +++++++++++++++++++++++++++++----
> >  src/intel/compiler/brw_fs_generator.cpp | 21 +++--------------
> >  2 files changed, 39 insertions(+), 22 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.
> cpp
> > index 113f62c..ab8cc89 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const
> fs_builder &bld, fs_inst *inst, opcode op,
> >         op == SHADER_OPCODE_SAMPLEINFO ||
> >         is_high_sampler(devinfo, sampler)) {
> >        /* For general texture offsets (no txf workaround), we need a
> header to
> > -       * put them in.  Note that we're only reserving space for it in
> the
> > -       * message payload as it will be initialized implicitly by the
> > -       * generator.
> > +       * put them in.
> >         *
> >         * TG4 needs to place its channel select in the header, for
> interaction
> >         * with ARB_texture_swizzle.  The sampler index is only 4-bits,
> so for
> >         * larger sampler numbers we need to offset the Sampler State
> Pointer in
> >         * the header.
> >         */
> > +      fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD);
> >        header_size = 1;
> > -      sources[0] = fs_reg();
> >        length++;
> >
> >        /* If we're requesting fewer than four channels worth of response,
> > @@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder
> &bld, fs_inst *inst, opcode op,
> >           unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1)
> & 0xf;
> >           inst->offset |= mask << 12;
> >        }
> > +
> > +      /* Build the actual header */
> > +      const fs_builder ubld = bld.exec_all().group(8, 0);
> > +      const fs_builder ubld1 = ubld.group(1, 0);
> > +      ubld.MOV(header, retype(brw_vec8_grf(0, 0),
> BRW_REGISTER_TYPE_UD));
> > +      if (inst->offset) {
> > +         ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
> > +      } else if (bld.shader->stage != MESA_SHADER_VERTEX &&
> > +                 bld.shader->stage != MESA_SHADER_FRAGMENT) {
> > +         /* The vertex and fragment stages have g0.2 set to 0, so
> > +          * header0.2 is 0 when g0 is copied. Other stages may not, so
> we
> > +          * must set it to 0 to avoid setting undesirable bits in the
> > +          * message.
> > +          */
> > +         ubld1.MOV(component(header, 2), brw_imm_ud(0));
> > +      }
> > +
> > +      if (is_high_sampler(devinfo, sampler)) {
> > +         if (sampler.file == BRW_IMMEDIATE_VALUE) {
> > +            assert(sampler.ud >= 16);
> > +            const int sampler_state_size = 16; /* 16 bytes */
> > +
> > +            ubld1.ADD(component(header, 3),
> > +                      retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
> > +                      brw_imm_ud(16 * (sampler.ud / 16) *
> sampler_state_size));
> > +         } else {
> > +            fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
> > +            ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
> > +            ubld1.SHL(tmp, tmp, brw_imm_ud(4));
> > +            ubld1.ADD(component(header, 3),
> > +                      retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
> > +                      tmp);
> > +         }
> > +      }
> >     }
> >
> >     if (shadow_c.file != BAD_FILE) {
> > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> > index b59c09f..a5a821a 100644
> > --- a/src/intel/compiler/brw_fs_generator.cpp
> > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > @@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst,
> struct brw_reg dst, struct brw_reg src
> >      * we need to set it up explicitly and load the offset bitfield.
> >      * Otherwise, we can use an implied move from g0 to the first
> message reg.
> >      */
> > -   if (inst->header_size != 0) {
> > +   if (inst->header_size != 0 && devinfo->gen < 7) {
> >        if (devinfo->gen < 6 && !inst->offset) {
> >           /* Set up an implied move from g0 to the MRF. */
> >           src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
> >        } else {
> > -         struct brw_reg header_reg;
> > -
> > -         if (devinfo->gen >= 7) {
> > -            header_reg = src;
> > -         } else {
> > -            assert(inst->base_mrf != -1);
> > -            header_reg = brw_message_reg(inst->base_mrf);
> > -         }
> > +         assert(inst->base_mrf != -1);
> > +         struct brw_reg header_reg = brw_message_reg(inst->base_mrf);
> >
> >           brw_push_insn_state(p);
> >           brw_set_default_exec_size(p, BRW_EXECUTE_8);
> > @@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct
> brw_reg dst, struct brw_reg src
> >              /* Set the offset bits in DWord 2. */
> >              brw_MOV(p, get_element_ud(header_reg, 2),
> >                         brw_imm_ud(inst->offset));
> > -         } else if (stage != MESA_SHADER_VERTEX &&
> > -                    stage != MESA_SHADER_FRAGMENT) {
> > -            /* The vertex and fragment stages have g0.2 set to 0, so
> > -             * header0.2 is 0 when g0 is copied. Other stages may not,
> so we
> > -             * must set it to 0 to avoid setting undesirable bits in the
> > -             * message.
> > -             */
> > -            brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
> >           }
> >
> > -         brw_adjust_sampler_state_pointer(p, header_reg,
> sampler_index);
> >           brw_pop_insn_state(p);
> >        }
> >     }
> > --
> > 2.5.0.400.gff86faf
> >
> > _______________________________________________
> > mesa-stable mailing list
> > mesa-stable at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180301/3b5a4035/attachment-0001.html>


More information about the mesa-stable mailing list