[Mesa-dev] [PATCH 15/53] intel/fs: Set up FB write message headers in the visitor

Sat May 26 18:27:28 UTC 2018

Jason Ekstrand <jason at jlekstrand.net> writes:

> Doing instruction header setup in the generator is aweful for a number
> of reasons.  For one, we can't schedule the header setup at all.  For
> another, it means lots of implied writes which the instruction scheduler
> and other passes can't properly read about.  The second isn't a huge
> problem for FB writes since they always happen at the end.  We made a
> similar change to sampler handling in ff4726077d86.
> ---
>  src/intel/compiler/brw_fs.cpp           | 103 ++++++++++++++++++++++++++------
>  src/intel/compiler/brw_fs_generator.cpp |  66 --------------------
>  2 files changed, 86 insertions(+), 83 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 3e9ccc4..5276a66 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -3247,7 +3247,18 @@ fs_visitor::emit_repclear_shader()
>        write->mlen = 1;
>     } else {
>        assume(key->nr_color_regions > 0);
> +
> +      struct brw_reg header =
> +         retype(brw_message_reg(base_mrf), BRW_REGISTER_TYPE_UD);
> +      bld.exec_all().group(16, 0)
> +         .MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
> +
>        for (int i = 0; i < key->nr_color_regions; ++i) {
> +         if (i > 0) {
> +            bld.exec_all().group(1, 0)
> +               .MOV(component(header, 2), brw_imm_ud(i));
> +         }
> +
>           write = bld.emit(FS_OPCODE_REP_FB_WRITE);
>           write->saturate = key->clamp_fragment_color;
>           write->base_mrf = base_mrf;
> @@ -3972,25 +3983,83 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
>     int header_size = 2, payload_header_size;
>     unsigned length = 0;
>  
> -   /* From the Sandy Bridge PRM, volume 4, page 198:
> -    *
> -    *     "Dispatched Pixel Enables. One bit per pixel indicating
> -    *      which pixels were originally enabled when the thread was
> -    *      dispatched. This field is only required for the end-of-
> -    *      thread message and on all dual-source messages."
> -    */
> -   if (devinfo->gen >= 6 &&
> -       (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
> -       color1.file == BAD_FILE &&
> -       key->nr_color_regions == 1) {
> -      header_size = 0;
> -   }
> +   if (devinfo->gen < 6) {
> +      /* For gen4-5, we always have a header consisting of g0 and g1.  We have
> +       * an implied MOV from g0,g1 to the start of the message.  The MOV from
> +       * g0 is handled by the hardware and the MOV from g1 is provided by the
> +       * generator.  This is required because, on gen4-5, the generator may
> +       * generate two write messages with different message lengths in order
> +       * to handle AA data properly.
> +       *
> +       * Also, since the pixel mask goes in the g0 portion of the message and
> +       * since render target writes are the last thing in the shader, we write
> +       * the pixel mask directly into g0 and it will get copied as part of the
> +       * implied write.
> +       */
> +      if (prog_data->uses_kill) {
> +         bld.exec_all().group(1, 0)
> +            .MOV(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW),
> +                 brw_flag_reg(0, 1));
> +      }
> +
> +      assert(length == 0);
> +      length = 2;
> +   } else if ((devinfo->gen <= 7 && !devinfo->is_haswell &&
> +               prog_data->uses_kill) ||
> +              color1.file != BAD_FILE ||
> +              key->nr_color_regions > 1) {
> +      /* From the Sandy Bridge PRM, volume 4, page 198:
> +       *
> +       *     "Dispatched Pixel Enables. One bit per pixel indicating
> +       *      which pixels were originally enabled when the thread was
> +       *      dispatched. This field is only required for the end-of-
> +       *      thread message and on all dual-source messages."
> +       */
> +      const fs_builder ubld = bld.exec_all().group(8, 0);
> +
> +      /* The header starts off as g0 and g1 */
> +      fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> +      ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),
> +                                           BRW_REGISTER_TYPE_UD));
> +
> +      uint32_t g00_bits = 0;
> +
> +      /* Set "Source0 Alpha Present to RenderTarget" bit in message
> +       * header.
> +       */
> +      if (inst->target > 0 && key->replicate_alpha)
> +         g00_bits |= 1 << 11;
> +
> +      /* Set computes stencil to render target */
> +      if (prog_data->computed_stencil)
> +         g00_bits |= 1 << 14;
> +
> +      if (g00_bits) {
> +         /* OR extra bits into g0.0 */
> +         ubld.group(1, 0).OR(component(header, 0),
> +                             retype(brw_vec1_grf(0, 0),
> +                                    BRW_REGISTER_TYPE_UD),
> +                             brw_imm_ud(g00_bits));
> +      }
> +
> +      /* Set the render target index for choosing BLEND_STATE. */
> +      if (inst->target > 0) {
> +         ubld.group(1, 0).MOV(component(header, 2), brw_imm_ud(inst->target));
> +      }
> +
> +      if (prog_data->uses_kill) {
> +         ubld.group(1, 0).MOV(retype(component(header, 15),
> +                                     BRW_REGISTER_TYPE_UW),
> +                              brw_flag_reg(0, 1));
> +      }

The original patch from my branch implemented the header setup in a
helper function called setup_fb_write_header() which allowed it to share
the code among emit_repclear_shader() and lower_fb_write_logical_send().
I don't think there is any benefit from open-coding the header setup in
the already complex enough callers, seems like an obfuscation to me
(particularly of the emit_repclear_shader() codepath with PATCH 52 in
mind).

>  
> -   if (header_size != 0) {
> -      assert(header_size == 2);
> -      /* Allocate 2 registers for a header */
> -      length += 2;
> +      assert(length == 0);
> +      sources[0] = header;
> +      sources[1] = horiz_offset(header, 8);
> +      length = 2;
>     }
> +   assert(length == 0 || length == 2);
> +   header_size = length;
>  
>     if (payload.aa_dest_stencil_reg) {
>        sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
> diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
> index 8cc9d31..1a2b961 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -307,9 +307,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
>  void
>  fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
>  {
> -   struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
> -   const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
> -
>     if (devinfo->gen < 8 && !devinfo->is_haswell) {
>        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
>     }
> @@ -320,69 +317,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
>     if (inst->base_mrf >= 0)
>        payload = brw_message_reg(inst->base_mrf);
>  
> -   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
> -    * move, here's g1.
> -    */
> -   if (inst->header_size != 0) {
> -      brw_push_insn_state(p);
> -      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> -      brw_set_default_exec_size(p, BRW_EXECUTE_1);
> -      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> -      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
> -      brw_set_default_flag_reg(p, 0, 0);
> -
> -      /* On HSW, the GPU will use the predicate on SENDC, unless the header is
> -       * present.
> -       */
> -      if (prog_data->uses_kill) {
> -         struct brw_reg pixel_mask;
> -
> -         if (devinfo->gen >= 6)
> -            pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
> -         else
> -            pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
> -
> -         brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
> -      }
> -
> -      if (devinfo->gen >= 6) {
> -         brw_push_insn_state(p);
> -         brw_set_default_exec_size(p, BRW_EXECUTE_16);
> -	 brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
> -	 brw_MOV(p,
> -		 retype(payload, BRW_REGISTER_TYPE_UD),
> -		 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
> -         brw_pop_insn_state(p);
> -
> -         if (inst->target > 0 && key->replicate_alpha) {
> -            /* Set "Source0 Alpha Present to RenderTarget" bit in message
> -             * header.
> -             */
> -            brw_OR(p,
> -		   vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
> -		   vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
> -		   brw_imm_ud(0x1 << 11));
> -         }
> -
> -	 if (inst->target > 0) {
> -	    /* Set the render target index for choosing BLEND_STATE. */
> -	    brw_MOV(p, retype(vec1(suboffset(payload, 2)),
> -                              BRW_REGISTER_TYPE_UD),
> -		    brw_imm_ud(inst->target));
> -	 }
> -
> -         /* Set computes stencil to render target */
> -         if (prog_data->computed_stencil) {
> -            brw_OR(p,
> -                   vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
> -                   vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
> -                   brw_imm_ud(0x1 << 14));
> -         }
> -      }
> -
> -      brw_pop_insn_state(p);
> -   }
> -
>     if (!runtime_check_aads_emit) {
>        fire_fb_write(inst, payload, implied_header, inst->mlen);
>     } else {
> -- 
> 2.5.0.400.gff86faf
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 227 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180526/f551cf32/attachment.sig>