[Mesa-dev] [PATCH v3 40/42] intel/compiler: support half-float in the combine constants pass

Fri Jan 18 11:35:31 UTC 2019

On Thu, 2019-01-17 at 18:18 -0600, Jason Ekstrand wrote:
> On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga <itoral at igalia.com
> > wrote:
> > Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> > 
> > ---
> > 
> >  .../compiler/brw_fs_combine_constants.cpp     | 60
> > +++++++++++++++----
> > 
> >  1 file changed, 49 insertions(+), 11 deletions(-)
> > 
> > 
> > 
> > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > index 7343f77bb45..54017e5668b 100644
> > 
> > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > 
> > @@ -36,6 +36,7 @@
> > 
> > 
> > 
> >  #include "brw_fs.h"
> > 
> >  #include "brw_cfg.h"
> > 
> > +#include "util/half_float.h"
> > 
> > 
> > 
> >  using namespace brw;
> > 
> > 
> > 
> > @@ -114,8 +115,9 @@ struct imm {
> > 
> >      */
> > 
> >     exec_list *uses;
> > 
> > 
> > 
> > -   /** The immediate value.  We currently only handle floats. */
> > 
> > +   /** The immediate value.  We currently only handle float and
> > half-float. */
> > 
> >     float val;
> > 
> > +   brw_reg_type type;
> 
> I had a brief chat with Matt today and I think that this may be going
> in the wrong direction.  In particular, I'd like us to eventually
> (maybe we can do it now?) generalize the combine_constants pass to
> more data types; in particular, integers.  I recently came across a
> shader where the fact that we couldn't do combine_constants on
> integers was causing significant register pressure problems and
> spilling.  (The test was doing a bunch of BFI2/BFE with constant
> sources.)  It could also be a huge win for 8-bit and 64-bit where we
> can't put immediates in regular 2-src instructions.
> What does this mean for the pass?  I suspect we want a bit size
> instead of a type and a simple char[8] for the data and just make it
> a blob of bits.  We may also want some sort of heuristic so we don't
> burn constant table space for things that are only used once or maybe
> even twice.
> 
> Normally, I would say "do it as a fixup" but if we go the direction
> of having a float and using _mesa_half_to_float and
> _mesa_float_to_half, I suspect it'll be harder to go for the bag-of-
> bits approach.
> 
> Thoughts?

Fair enough, I see the value in having this support more than just F
and HF. I'll think a bit about it and I'll send a different version
that tries to cover these cases as well.
> --Jason
>  
> > 
> >     /**
> > 
> >      * The GRF register and subregister number where we've decided
> > to store the
> > 
> > @@ -145,10 +147,10 @@ struct table {
> > 
> >  };
> > 
> > 
> > 
> >  static struct imm *
> > 
> > -find_imm(struct table *table, float val)
> > 
> > +find_imm(struct table *table, float val, brw_reg_type type)
> > 
> >  {
> > 
> >     for (int i = 0; i < table->len; i++) {
> > 
> > -      if (table->imm[i].val == val) {
> > 
> > +      if (table->imm[i].val == val && table->imm[i].type == type)
> > {
> > 
> >           return &table->imm[i];
> > 
> >        }
> > 
> >     }
> > 
> > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> > 
> >     return a->first_use_ip - b->first_use_ip;
> > 
> >  }
> > 
> > 
> > 
> > +static bool
> > 
> > +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> > 
> > +{
> > 
> > +   /* reg_val represents the immediate value in the register in
> > its original
> > 
> > +    * bit-size, while imm_val is always a valid 32-bit float
> > value.
> > 
> > +    */
> > 
> > +   if (type == BRW_REGISTER_TYPE_HF) {
> > 
> > +      uint32_t reg_val_ud = *((uint32_t *) &reg_val);
> > 
> > +      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> > 
> > +   }
> > 
> > +
> > 
> > +   return signbit(imm_val) != signbit(reg_val);
> > 
> > +}
> > 
> > +
> > 
> >  bool
> > 
> >  fs_visitor::opt_combine_constants()
> > 
> >  {
> > 
> > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
> > 
> > 
> > 
> >        for (int i = 0; i < inst->sources; i++) {
> > 
> >           if (inst->src[i].file != IMM ||
> > 
> > -             inst->src[i].type != BRW_REGISTER_TYPE_F)
> > 
> > +             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> > 
> > +              inst->src[i].type != BRW_REGISTER_TYPE_HF))
> > 
> >              continue;
> > 
> > 
> > 
> > -         float val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > 
> > -                     fabs(inst->src[i].f);
> > 
> > -         struct imm *imm = find_imm(&table, val);
> > 
> > +         float val;
> > 
> > +         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> > 
> > +            val = !inst->can_do_source_mods(devinfo) ? inst-
> > >src[i].f :
> > 
> > +                        fabs(inst->src[i].f);
> > 
> > +         } else {
> > 
> > +            val = !inst->can_do_source_mods(devinfo) ?
> > 
> > +               _mesa_half_to_float(inst->src[i].d & 0xffff) :
> > 
> > +               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> > 
> > +         }
> > 
> > +         struct imm *imm = find_imm(&table, val, inst-
> > >src[i].type);
> > 
> > 
> > 
> >           if (imm) {
> > 
> >              bblock_t *intersection = cfg_t::intersect(block, imm-
> > >block);
> > 
> > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> > 
> >              imm->uses = new(const_ctx) exec_list();
> > 
> >              imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> > 
> >              imm->val = val;
> > 
> > +            imm->type = inst->src[i].type;
> > 
> >              imm->uses_by_coissue = could_coissue(devinfo, inst);
> > 
> >              imm->must_promote = must_promote_imm(devinfo, inst);
> > 
> >              imm->first_use_ip = ip;
> > 
> > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> > 
> >                        imm->block->last_non_control_flow_inst()-
> > >next);
> > 
> >        const fs_builder ibld = bld.at(imm->block,
> > n).exec_all().group(1, 0);
> > 
> > 
> > 
> > -      ibld.MOV(reg, brw_imm_f(imm->val));
> > 
> > +      reg = retype(reg, imm->type);
> > 
> > +      if (imm->type == BRW_REGISTER_TYPE_F) {
> > 
> > +         ibld.MOV(reg, brw_imm_f(imm->val));
> > 
> > +      } else {
> > 
> > +         const uint16_t val_hf = _mesa_float_to_half(imm->val);
> > 
> > +         ibld.MOV(reg, retype(brw_imm_uw(val_hf),
> > BRW_REGISTER_TYPE_HF));
> > 
> > +      }
> > 
> >        imm->nr = reg.nr;
> > 
> >        imm->subreg_offset = reg.offset;
> > 
> > 
> > 
> > +      /* Keep offsets 32-bit aligned since we are mixing 32-bit
> > and 16-bit
> > 
> > +       * constants into the same register
> > 
> > +       *
> > 
> > +       * TODO: try to pack pairs of HF constants into each 32-bit
> > slot
> > 
> > +       */
> > 
> >        reg.offset += sizeof(float);
> > 
> > -      if (reg.offset == 8 * sizeof(float)) {
> > 
> > +      if (reg.offset == REG_SIZE) {
> > 
> >           reg.nr = alloc.allocate(1);
> > 
> >           reg.offset = 0;
> > 
> >        }
> > 
> > @@ -295,12 +331,14 @@ fs_visitor::opt_combine_constants()
> > 
> >        foreach_list_typed(reg_link, link, link, table.imm[i].uses)
> > {
> > 
> >           fs_reg *reg = link->reg;
> > 
> >           assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> > 
> > -                fabsf(reg->f) == fabs(table.imm[i].val));
> > 
> > +                fabsf(reg->f) == fabs(table.imm[i].val) ||
> > 
> > +                table.imm[i].type == BRW_REGISTER_TYPE_HF);
> > 
> > 
> > 
> >           reg->file = VGRF;
> > 
> > +         reg->type = table.imm[i].type;
> > 
> >           reg->offset = table.imm[i].subreg_offset;
> > 
> >           reg->stride = 0;
> > 
> > -         reg->negate = signbit(reg->f) !=
> > signbit(table.imm[i].val);
> > 
> > +         reg->negate = needs_negate(reg->f, table.imm[i].val,
> > table.imm[i].type);
> > 
> >           reg->nr = table.imm[i].nr;
> > 
> >        }
> > 
> >     }
> > 
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190118/78caf906/attachment-0001.html>