[Mesa-dev] [PATCH v3 40/42] intel/compiler: support half-float in the combine constants pass
Jason Ekstrand
jason at jlekstrand.net
Fri Jan 18 00:18:43 UTC 2019
On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga <itoral at igalia.com>
wrote:
> Reviewed-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
> ---
> .../compiler/brw_fs_combine_constants.cpp | 60 +++++++++++++++----
> 1 file changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> b/src/intel/compiler/brw_fs_combine_constants.cpp
> index 7343f77bb45..54017e5668b 100644
> --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> @@ -36,6 +36,7 @@
>
> #include "brw_fs.h"
> #include "brw_cfg.h"
> +#include "util/half_float.h"
>
> using namespace brw;
>
> @@ -114,8 +115,9 @@ struct imm {
> */
> exec_list *uses;
>
> - /** The immediate value. We currently only handle floats. */
> + /** The immediate value. We currently only handle float and
> half-float. */
> float val;
> + brw_reg_type type;
>
I had a brief chat with Matt today and I think that this may be going in
the wrong direction. In particular, I'd like us to eventually (maybe we
can do it now?) generalize the combine_constants pass to more data types;
in particular, integers. I recently came across a shader where the fact
that we couldn't do combine_constants on integers was causing significant
register pressure problems and spilling. (The test was doing a bunch of
BFI2/BFE with constant sources.) It could also be a huge win for 8-bit and
64-bit where we can't put immediates in regular 2-src instructions.
What does this mean for the pass? I suspect we want a bit size instead of
a type and a simple char[8] for the data and just make it a blob of bits.
We may also want some sort of heuristic so we don't burn constant table
space for things that are only used once or maybe even twice.
Normally, I would say "do it as a fixup" but if we go the direction of
having a float and using _mesa_half_to_float and _mesa_float_to_half, I
suspect it'll be harder to go for the bag-of-bits approach.
Thoughts?
--Jason
>
> /**
> * The GRF register and subregister number where we've decided to
> store the
> @@ -145,10 +147,10 @@ struct table {
> };
>
> static struct imm *
> -find_imm(struct table *table, float val)
> +find_imm(struct table *table, float val, brw_reg_type type)
> {
> for (int i = 0; i < table->len; i++) {
> - if (table->imm[i].val == val) {
> + if (table->imm[i].val == val && table->imm[i].type == type) {
> return &table->imm[i];
> }
> }
> @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)
> return a->first_use_ip - b->first_use_ip;
> }
>
> +static bool
> +needs_negate(float reg_val, float imm_val, brw_reg_type type)
> +{
> + /* reg_val represents the immediate value in the register in its
> original
> + * bit-size, while imm_val is always a valid 32-bit float value.
> + */
> + if (type == BRW_REGISTER_TYPE_HF) {
> + uint32_t reg_val_ud = *((uint32_t *) ®_val);
> + reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);
> + }
> +
> + return signbit(imm_val) != signbit(reg_val);
> +}
> +
> bool
> fs_visitor::opt_combine_constants()
> {
> @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()
>
> for (int i = 0; i < inst->sources; i++) {
> if (inst->src[i].file != IMM ||
> - inst->src[i].type != BRW_REGISTER_TYPE_F)
> + (inst->src[i].type != BRW_REGISTER_TYPE_F &&
> + inst->src[i].type != BRW_REGISTER_TYPE_HF))
> continue;
>
> - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> - fabs(inst->src[i].f);
> - struct imm *imm = find_imm(&table, val);
> + float val;
> + if (inst->src[i].type == BRW_REGISTER_TYPE_F) {
> + val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> + fabs(inst->src[i].f);
> + } else {
> + val = !inst->can_do_source_mods(devinfo) ?
> + _mesa_half_to_float(inst->src[i].d & 0xffff) :
> + fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));
> + }
> + struct imm *imm = find_imm(&table, val, inst->src[i].type);
>
> if (imm) {
> bblock_t *intersection = cfg_t::intersect(block, imm->block);
> @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()
> imm->uses = new(const_ctx) exec_list();
> imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> imm->val = val;
> + imm->type = inst->src[i].type;
> imm->uses_by_coissue = could_coissue(devinfo, inst);
> imm->must_promote = must_promote_imm(devinfo, inst);
> imm->first_use_ip = ip;
> @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()
> imm->block->last_non_control_flow_inst()->next);
> const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1,
> 0);
>
> - ibld.MOV(reg, brw_imm_f(imm->val));
> + reg = retype(reg, imm->type);
> + if (imm->type == BRW_REGISTER_TYPE_F) {
> + ibld.MOV(reg, brw_imm_f(imm->val));
> + } else {
> + const uint16_t val_hf = _mesa_float_to_half(imm->val);
> + ibld.MOV(reg, retype(brw_imm_uw(val_hf), BRW_REGISTER_TYPE_HF));
> + }
> imm->nr = reg.nr;
> imm->subreg_offset = reg.offset;
>
> + /* Keep offsets 32-bit aligned since we are mixing 32-bit and 16-bit
> + * constants into the same register
> + *
> + * TODO: try to pack pairs of HF constants into each 32-bit slot
> + */
> reg.offset += sizeof(float);
> - if (reg.offset == 8 * sizeof(float)) {
> + if (reg.offset == REG_SIZE) {
> reg.nr = alloc.allocate(1);
> reg.offset = 0;
> }
> @@ -295,12 +331,14 @@ fs_visitor::opt_combine_constants()
> foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
> fs_reg *reg = link->reg;
> assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> - fabsf(reg->f) == fabs(table.imm[i].val));
> + fabsf(reg->f) == fabs(table.imm[i].val) ||
> + table.imm[i].type == BRW_REGISTER_TYPE_HF);
>
> reg->file = VGRF;
> + reg->type = table.imm[i].type;
> reg->offset = table.imm[i].subreg_offset;
> reg->stride = 0;
> - reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
> + reg->negate = needs_negate(reg->f, table.imm[i].val,
> table.imm[i].type);
> reg->nr = table.imm[i].nr;
> }
> }
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190117/dda7e2e3/attachment-0001.html>
More information about the mesa-dev
mailing list