[Mesa-dev] [PATCH 21/21] i965/fs: Expose arbitrary channel execution groups to the IR.

Jason Ekstrand jason at jlekstrand.net
Tue May 24 23:14:41 UTC 2016


On Tue, May 24, 2016 at 12:18 AM, Francisco Jerez <currojerez at riseup.net>
wrote:

> This generalizes the current fs_inst::force_sechalf flag to allow
> specifying channel enable groups other than 0 or 8.  At some point it
> will likely make sense to fix the vec4 generator to support arbitrary
> execution groups and then move the definition of fs_inst::group into
> backend_instruction (e.g. so we can do FP64 in the VEC4 back-end).
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp              | 20
> ++++++++------------
>  src/mesa/drivers/dri/i965/brw_fs_builder.h        | 14 +++++++++++---
>  src/mesa/drivers/dri/i965/brw_fs_cse.cpp          |  4 ++--
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |  7 ++++---
>  src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp |  2 +-
>  src/mesa/drivers/dri/i965/brw_ir_fs.h             | 20
> +++++++++-----------
>  6 files changed, 35 insertions(+), 32 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index a59cd3c..92caeaa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()
>              mul->src[1].stride *= 2;
>
>           } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
> -                    inst->force_sechalf) {
> +                    inst->group > 0) {
>              /* Among other things the quarter control bits influence which
>               * accumulator register is used by the hardware for
> instructions
>               * that access the accumulator implicitly (e.g. MACH).  A
> @@ -3655,7 +3655,7 @@ fs_visitor::lower_integer_multiplication()
>               * to get the result masked correctly according to the current
>               * channel enables.
>               */
> -            mach->force_sechalf = false;
> +            mach->group = 0;
>              mach->force_writemask_all = true;
>              mach->dst = ibld.vgrf(inst->dst.type);
>              ibld.MOV(inst->dst, mach->dst);
> @@ -3791,8 +3791,8 @@ lower_fb_write_logical_send(const fs_builder &bld,
> fs_inst *inst,
>        sample_mask.stride *= 2;
>
>        bld.exec_all().annotate("FB write oMask")
> -         .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
> -                   inst->force_sechalf),
> +         .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
> +                           inst->group),
>                sample_mask);
>        length++;
>     }
> @@ -5017,10 +5017,10 @@ fs_visitor::lower_simd_width()
>            * execution size of the builder to the highest of both for now
> so
>            * we're sure that both cases can be handled.
>            */
> +         const unsigned max_width = MAX2(inst->exec_size, lower_width);
>           const fs_builder ibld = bld.at(block, inst)
>                                      .exec_all(inst->force_writemask_all)
> -                                    .group(MAX2(inst->exec_size,
> lower_width),
> -                                           inst->force_sechalf);
> +                                    .group(max_width, inst->group /
> max_width);
>
>           /* Split the copies in chunks of the execution width of either
> the
>            * original or the lowered instruction, whichever is lower.
> @@ -5352,12 +5352,8 @@ fs_visitor::dump_instruction(backend_instruction
> *be_inst, FILE *file)
>     if (inst->force_writemask_all)
>        fprintf(file, "NoMask ");
>
> -   if (dispatch_width == 16 && inst->exec_size == 8) {
> -      if (inst->force_sechalf)
> -         fprintf(file, "2ndhalf ");
> -      else
> -         fprintf(file, "1sthalf ");
> -   }
> +   if (inst->exec_size != dispatch_width)
> +      fprintf(file, "group%d ", inst->group);
>
>     fprintf(file, "\n");
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> index b50dda4..c1d13a2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
> @@ -72,7 +72,7 @@ namespace brw {
>        fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
>           shader(shader), block(block), cursor(inst),
>           _dispatch_width(inst->exec_size),
> -         _group(inst->force_sechalf ? 8 : 0),
> +         _group(inst->group),
>           force_writemask_all(inst->force_writemask_all)
>        {
>           annotation.str = inst->annotation;
> @@ -168,6 +168,15 @@ namespace brw {
>        }
>
>        /**
> +       * Get the channel group in use.
> +       */
> +      unsigned
> +      group() const
> +      {
> +         return _group;
> +      }
> +
> +      /**
>         * Allocate a virtual register of natural vector size (one for this
> IR)
>         * and SIMD width.  \p n gives the amount of space to allocate in
>         * dispatch_width units (which is just enough space for one logical
> @@ -353,9 +362,8 @@ namespace brw {
>           assert(inst->exec_size <= 32);
>           assert(inst->exec_size == dispatch_width() ||
>                  force_writemask_all);
> -         assert(_group == 0 || _group == 8);
>
> -         inst->force_sechalf = (_group == 8);
> +         inst->group = _group;
>           inst->force_writemask_all = force_writemask_all;
>           inst->annotation = annotation.str;
>           inst->ir = annotation.ir;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index 9c39106..159bf5d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool
> *negate)
>     return a->opcode == b->opcode &&
>            a->force_writemask_all == b->force_writemask_all &&
>            a->exec_size == b->exec_size &&
> -          a->force_sechalf == b->force_sechalf &&
> +          a->group == b->group &&
>            a->saturate == b->saturate &&
>            a->predicate == b->predicate &&
>            a->predicate_inverse == b->predicate_inverse &&
> @@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst
> *inst, fs_reg src, bool negate)
>        copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
>     } else {
>        copy = bld.MOV(inst->dst, src);
> -      copy->force_sechalf = inst->force_sechalf;
> +      copy->group = inst->group;
>        copy->force_writemask_all = inst->force_writemask_all;
>        copy->src[0].negate = negate;
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 804c639..914ec9b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
>     if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
>        msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
>     else if (prog_data->dual_src_blend) {
> -      if (!inst->force_sechalf)
> +      if (!inst->group)
>           msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
>        else
>           msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
>

This doesn't look right.  I suppose it's correct for SIMD16-only and that
you have a follow-on patch for SIMD32 but it seems wrong.  If it's an easy
one-liner, maybe just squash it in?


> @@ -1071,7 +1071,7 @@ fs_generator::generate_scratch_write(fs_inst *inst,
> struct brw_reg src)
>     brw_set_default_compression(p, lower_size > 8);
>
>     for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
> -      brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size
> * i);
> +      brw_set_default_group(p, inst->group + lower_size * i);
>
>        brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
>                retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
> @@ -1615,7 +1615,7 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
>        const bool compressed =
>             inst->dst.component_size(inst->exec_size) > REG_SIZE;
>        brw_set_default_compression(p, compressed);
> -      brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
> +      brw_set_default_group(p, inst->group);
>
>        for (unsigned int i = 0; i < inst->sources; i++) {
>           src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
> @@ -1643,6 +1643,7 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
>        brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
>
>        assert(inst->force_writemask_all || inst->exec_size >= 8);
> +      assert(inst->force_writemask_all || inst->group % inst->exec_size
> == 0);
>        assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
>        assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> index 8613725..8cd897f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
> @@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()
>           /* Check that the MOVs are the right form. */
>           if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
>               then_mov[i]->exec_size != else_mov[i]->exec_size ||
> -             then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
> +             then_mov[i]->group != else_mov[i]->group ||
>               then_mov[i]->force_writemask_all !=
> else_mov[i]->force_writemask_all ||
>               then_mov[i]->is_partial_write() ||
>               else_mov[i]->is_partial_write() ||
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> index cdb4464..6a93e81 100644
> --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
> @@ -289,22 +289,20 @@ public:
>      */
>     uint8_t exec_size;
>
> +   /**
> +    * Channel group from the hardware execution and predication mask that
> +    * should be applied to the instruction.  The subset of channel enable
> +    * signals (calculated from the EU control flow and predication state)
> +    * given by [group, group + exec_size[ will be used to mask GRF writes
> and
>

If you're intending to indicate a half-open interval, I believe the
notation more people will be familiar with is "[group, group + exec_size)"


> +    * any other side effects of the instruction.
> +    */
> +   uint8_t group;
> +
>     bool eot:1;
> -   bool force_sechalf:1;
>     bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag
> */
>  };
>
>  /**
> - * Set second-half quarter control on \p inst.
> - */
> -static inline fs_inst *
> -set_sechalf(fs_inst *inst)
> -{
> -   inst->force_sechalf = true;
> -   return inst;
> -}
> -
> -/**
>   * Make the execution of \p inst dependent on the evaluation of a possibly
>   * inverted predicate.
>   */
> --
> 2.7.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160524/12d69743/attachment-0001.html>


More information about the mesa-dev mailing list