[Mesa-dev] [PATCH v2 51/53] intel/compiler: support half-float in the combine constants pass

Mon Jan 7 05:09:37 UTC 2019

On Thu, Jan 03, 2019 at 08:49:48AM +0100, Iago Toral wrote:
> On Wed, 2019-01-02 at 13:02 +0200, Pohjolainen, Topi wrote:
> > On Wed, Dec 19, 2018 at 12:51:19PM +0100, Iago Toral Quiroga wrote:
> > > ---
> > >  .../compiler/brw_fs_combine_constants.cpp     | 60
> > > +++++++++++++++----
> > >  1 file changed, 49 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> > > b/src/intel/compiler/brw_fs_combine_constants.cpp
> > > index e0c95d379b8..24307e365ab 100644
> > > --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> > > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> > > @@ -36,6 +36,7 @@
> > >  
> > >  #include "brw_fs.h"
> > >  #include "brw_cfg.h"
> > > +#include "util/half_float.h"
> > >  
> > >  using namespace brw;
> > >  
> > > @@ -114,8 +115,9 @@ struct imm {
> > >      */
> > >     exec_list *uses;
> > >  
> > > -   /** The immediate value.  We currently only handle floats. */
> > > +   /** The immediate value.  We currently only handle float and
> > > half-float. */
> > >     float val;
> > > +   brw_reg_type type;
> > >  
> > >     /**
> > >      * The GRF register and subregister number where we've decided
> > > to store the
> > > @@ -145,10 +147,10 @@ struct table {
> > >  };
> > >  
> > >  static struct imm *
> > > -find_imm(struct table *table, float val)
> > > +find_imm(struct table *table, float val, brw_reg_type type)
> > >  {
> > >     for (int i = 0; i < table->len; i++) {
> > > -      if (table->imm[i].val == val) {
> > > +      if (table->imm[i].val == val && table->imm[i].type == type)
> > > {
> > >           return &table->imm[i];
> > >        }
> > >     }
> > > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> > >     return a->first_use_ip - b->first_use_ip;
> > >  }
> > >  
> > > +static bool
> > > +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> > > +{
> > > +   /* reg_val represents the immediate value in the register in
> > > its original
> > > +    * bit-size, while imm_val is always a valid 32-bit float
> > > value.
> > > +    */
> > > +   if (type == BRW_REGISTER_TYPE_HF) {
> > > +      uint32_t reg_val_ud = *((uint32_t *) &reg_val);
> > 
> > Casting "float" to uint32_t and reading then only 16-bits from it
> > looks a
> > little ugly. Could we use "uint32_t reg_val" and then below in the
> > caller
> > use "reg->u" instead of "reg->f"?
> 
> No, because the sigbit macro used below works on floating point values,
> so if we did that, we would have to do a pointer cast to float before
> we call that macro with reg_val.

That is true, a cast is needed in any case. I just think using uint32_t is a
little cleaner as the "bag of bits"-type that is then interpreted either as
float or float16.

> 
> One thing we can do is to pass the fs_reg instead of reg->f and then
> read reg->f or reg->u as we need inside this function. We would be
> abusing a bit the interface but we would get rid of the casting that
> way... I am not sure if I like it better or worse.
> 
> Alternatively, we could just memcpy reg_val into a uint32_t to avoid
> the pointer cast. Maybe that looks less ugly.
> 
> What do you think?

I guess my concern is passing in a float and then interpreting it as float16.
Like said above I'm more used to having it as uint in such case where one just
takes the lower 16 bits and casts them. But this is really just nitpicking,
lets go with what you have here.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>

> 
> > > +      reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> > > +   }
> > > +
> > > +   return signbit(imm_val) != signbit(reg_val);
> > > +}
> > > +
> > >  bool
> > >  fs_visitor::opt_combine_constants()
> > >  {
> > > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
> > >  
> > >        for (int i = 0; i < inst->sources; i++) {
> > >           if (inst->src[i].file != IMM ||
> > > -             inst->src[i].type != BRW_REGISTER_TYPE_F)
> > > +             (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> > > +              inst->src[i].type != BRW_REGISTER_TYPE_HF))
> > >              continue;
> > >  
> > > -         float val = !inst->can_do_source_mods(devinfo) ? inst-
> > > >src[i].f :
> > > -                     fabs(inst->src[i].f);
> > > -         struct imm *imm = find_imm(&table, val);
> > > +         float val;
> > > +         if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> > > +            val = !inst->can_do_source_mods(devinfo) ? inst-
> > > >src[i].f :
> > > +                        fabs(inst->src[i].f);
> > > +         } else {
> > > +            val = !inst->can_do_source_mods(devinfo) ?
> > > +               _mesa_half_to_float(inst->src[i].d & 0xffff) :
> > > +               fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> > > +         }
> > > +         struct imm *imm = find_imm(&table, val, inst-
> > > >src[i].type);
> > >  
> > >           if (imm) {
> > >              bblock_t *intersection = cfg_t::intersect(block, imm-
> > > >block);
> > > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> > >              imm->uses = new(const_ctx) exec_list();
> > >              imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> > >              imm->val = val;
> > > +            imm->type = inst->src[i].type;
> > >              imm->uses_by_coissue = could_coissue(devinfo, inst);
> > >              imm->must_promote = must_promote_imm(devinfo, inst);
> > >              imm->first_use_ip = ip;
> > > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> > >                        imm->block->last_non_control_flow_inst()-
> > > >next);
> > >        const fs_builder ibld = bld.at(imm->block,
> > > n).exec_all().group(1, 0);
> > >  
> > > -      ibld.MOV(reg, brw_imm_f(imm->val));
> > > +      reg = retype(reg, imm->type);
> > > +      if (imm->type == BRW_REGISTER_TYPE_F) {
> > > +         ibld.MOV(reg, brw_imm_f(imm->val));
> > > +      } else {
> > > +         const uint16_t val_hf = _mesa_float_to_half(imm->val);
> > > +         ibld.MOV(reg, retype(brw_imm_uw(val_hf),
> > > BRW_REGISTER_TYPE_HF));
> > > +      }
> > >        imm->nr = reg.nr;
> > >        imm->subreg_offset = reg.offset;
> > >  
> > > +      /* Keep offsets 32-bit aligned since we are mixing 32-bit
> > > and 16-bit
> > > +       * constants into the same register
> > > +       *
> > > +       * TODO: try to pack pairs of HF constants into each 32-bit
> > > slot
> > > +       */
> > >        reg.offset += sizeof(float);
> > > -      if (reg.offset == 8 * sizeof(float)) {
> > > +      if (reg.offset == REG_SIZE) {
> > >           reg.nr = alloc.allocate(1);
> > >           reg.offset = 0;
> > >        }
> > > @@ -296,11 +332,13 @@ fs_visitor::opt_combine_constants()
> > >           fs_reg *reg = link->reg;
> > >           reg->file = VGRF;
> > >           reg->nr = table.imm[i].nr;
> > > +         reg->type = table.imm[i].type;
> > >           reg->offset = table.imm[i].subreg_offset;
> > >           reg->stride = 0;
> > > -         reg->negate = signbit(reg->f) !=
> > > signbit(table.imm[i].val);
> > > +         reg->negate = needs_negate(reg->f, table.imm[i].val,
> > > table.imm[i].type);
> > >           assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> > > -                fabsf(reg->f) == fabs(table.imm[i].val));
> > > +                fabsf(reg->f) == fabs(table.imm[i].val) ||
> > > +                table.imm[i].type == BRW_REGISTER_TYPE_HF);
> > >        }
> > >     }
> > >  
> > > -- 
> > > 2.17.1
> > > 
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > 
> > 
>