[Mesa-dev] [PATCH v4 27/40] intel/compiler: generalize the combine constants pass
Jason Ekstrand
jason at jlekstrand.net
Sat Feb 16 15:29:00 UTC 2019
On Tue, Feb 12, 2019 at 5:57 AM Iago Toral Quiroga <itoral at igalia.com>
wrote:
> At the very least we need it to handle HF too, since we are doing
> constant propagation for MAD and LRP, which relies on this pass
> to promote the immediates to GRF in the end, but ideally
> we want it to support even more types so we can take advantage
> of it to improve register pressure in some scenarios.
> ---
> .../compiler/brw_fs_combine_constants.cpp | 202 ++++++++++++++++--
> 1 file changed, 180 insertions(+), 22 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp
> b/src/intel/compiler/brw_fs_combine_constants.cpp
> index 7343f77bb45..5d79f1a0826 100644
> --- a/src/intel/compiler/brw_fs_combine_constants.cpp
> +++ b/src/intel/compiler/brw_fs_combine_constants.cpp
> @@ -36,6 +36,7 @@
>
> #include "brw_fs.h"
> #include "brw_cfg.h"
> +#include "util/half_float.h"
>
> using namespace brw;
>
> @@ -114,8 +115,17 @@ struct imm {
> */
> exec_list *uses;
>
> - /** The immediate value. We currently only handle floats. */
> - float val;
> + /** The immediate value */
> + union {
> + char bytes[8];
> + float f;
> + int32_t d;
> + int16_t w;
> + };
> + uint8_t size;
> +
> + /** When promoting half-float we need to account for certain
> restrictions */
> + bool is_half_float;
>
> /**
> * The GRF register and subregister number where we've decided to
> store the
> @@ -145,10 +155,11 @@ struct table {
> };
>
> static struct imm *
> -find_imm(struct table *table, float val)
> +find_imm(struct table *table, void *data, uint8_t size)
> {
> for (int i = 0; i < table->len; i++) {
> - if (table->imm[i].val == val) {
> + if (table->imm[i].size == size &&
> + !memcmp(table->imm[i].bytes, data, size)) {
> return &table->imm[i];
> }
> }
> @@ -190,6 +201,96 @@ compare(const void *_a, const void *_b)
> return a->first_use_ip - b->first_use_ip;
> }
>
> +static bool
> +get_constant_value(const struct gen_device_info *devinfo,
> + const fs_inst *inst, uint32_t src_idx,
> + void *out, brw_reg_type *out_type)
> +{
> + const bool can_do_source_mods = inst->can_do_source_mods(devinfo);
> + const fs_reg *src = &inst->src[src_idx];
> +
> + *out_type = src->type;
> +
> + switch (*out_type) {
> + case BRW_REGISTER_TYPE_F: {
> + float val = !can_do_source_mods ? src->f : fabsf(src->f);
> + memcpy(out, &val, 4);
> + break;
> + }
> + case BRW_REGISTER_TYPE_HF: {
> + uint16_t val = src->d & 0xffffu;
> + if (can_do_source_mods)
> + val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));
> + memcpy(out, &val, 2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_D: {
> + int32_t val = !can_do_source_mods ? src->d : abs(src->d);
> + memcpy(out, &val, 4);
> + break;
> + }
> + case BRW_REGISTER_TYPE_UD:
> + memcpy(out, &src->ud, 4);
> + break;
> + case BRW_REGISTER_TYPE_W: {
> + int16_t val = src->d & 0xffffu;
> + if (can_do_source_mods)
> + val = abs(val);
> + memcpy(out, &val, 2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_UW:
> + memcpy(out, &src->ud, 2);
> + break;
>
You could also throw in DF and Q types. This is probably sufficient for
now though.
> + default:
> + return false;
> + };
> +
> + return true;
> +}
> +
> +static struct brw_reg
> +build_imm_reg_for_copy(struct imm *imm)
> +{
> + switch (imm->size) {
> + case 4:
> + return brw_imm_d(imm->d);
> + case 2:
> + return brw_imm_w(imm->w);
> + default:
> + unreachable("not implemented");
> + }
> +}
> +
> +static inline uint32_t
> +get_alignment_for_imm(const struct imm *imm)
> +{
> + if (imm->is_half_float)
> + return 4; /* At least MAD seems to require this */
> + else
> + return imm->size;
> +}
> +
> +static bool
> +needs_negate(const struct fs_reg *reg, const struct imm *imm)
> +{
> + switch (reg->type) {
> + case BRW_REGISTER_TYPE_F:
> + return signbit(reg->f) != signbit(imm->f);
> + case BRW_REGISTER_TYPE_D:
> + return (reg->d < 0) != (imm->d < 0);
> + case BRW_REGISTER_TYPE_HF:
> + return (reg->d & 0x8000u) != (imm->w & 0x8000u);
> + case BRW_REGISTER_TYPE_W:
> + return ((reg->d & 0xffffu) < 0) != (imm->w < 0);
> + case BRW_REGISTER_TYPE_UD:
> + case BRW_REGISTER_TYPE_UW:
> + return false;
> + default:
> + unreachable("not implemented");
> + };
> +}
> +
> bool
> fs_visitor::opt_combine_constants()
> {
> @@ -214,13 +315,17 @@ fs_visitor::opt_combine_constants()
> continue;
>
> for (int i = 0; i < inst->sources; i++) {
> - if (inst->src[i].file != IMM ||
> - inst->src[i].type != BRW_REGISTER_TYPE_F)
> + if (inst->src[i].file != IMM)
> continue;
>
> - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
> - fabs(inst->src[i].f);
> - struct imm *imm = find_imm(&table, val);
> + char data[8];
> + brw_reg_type type;
> + if (!get_constant_value(devinfo, inst, i, data, &type))
> + continue;
> +
> + uint8_t size = type_sz(type);
> +
> + struct imm *imm = find_imm(&table, data, size);
>
> if (imm) {
> bblock_t *intersection = cfg_t::intersect(block, imm->block);
> @@ -237,7 +342,9 @@ fs_visitor::opt_combine_constants()
> imm->inst = inst;
> imm->uses = new(const_ctx) exec_list();
> imm->uses->push_tail(link(const_ctx, &inst->src[i]));
> - imm->val = val;
> + memcpy(imm->bytes, data, size);
> + imm->size = size;
> + imm->is_half_float = type == BRW_REGISTER_TYPE_HF;
>
In the "if (imm)" case above, I think we want to do
if (type == BRW_REGISTER_TYPE_HF)
imm->is_half_float = true;
in case we get a constant that's used first as a uint16_t (which won't set
is_half_float) and then later as a float16_t.
> imm->uses_by_coissue = could_coissue(devinfo, inst);
> imm->must_promote = must_promote_imm(devinfo, inst);
> imm->first_use_ip = ip;
> @@ -276,17 +383,40 @@ fs_visitor::opt_combine_constants()
> */
> exec_node *n = (imm->inst ? imm->inst :
> imm->block->last_non_control_flow_inst()->next);
> - const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1,
> 0);
>
> - ibld.MOV(reg, brw_imm_f(imm->val));
> - imm->nr = reg.nr;
> - imm->subreg_offset = reg.offset;
> + /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
> + *
> + * "In Align16 mode, the channel selects and channel enables
> apply to a
> + * pair of half-floats, because these parameters are defined for
> DWord
> + * elements ONLY. This is applicable when both source and
> destination
> + * are half-floats."
> + *
> + * This means that Align16 instructions that use promoted HF
> immediates
> + * and use a <0,1,0>:HF region would read 2 HF slots instead of
> + * replicating the single one we want. To avoid this, we always
> populate
> + * both HF slots within a DWord with the constant.
> + */
> + const uint32_t width = devinfo->gen == 8 && imm->is_half_float ? 2
> : 1;
> + const fs_builder ibld = bld.at(imm->block,
> n).exec_all().group(width, 0);
> +
> + /* Put the immediate in an offset aligned to its size. Some
> instructions
> + * seem to have additional alignment requirements, so account for
> that
> + * too.
> + */
> + reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));
>
> - reg.offset += sizeof(float);
> - if (reg.offset == 8 * sizeof(float)) {
> + /* Ensure we have enough space in the register to copy the
> immediate */
> + struct brw_reg imm_reg = build_imm_reg_for_copy(imm);
> + if (reg.offset + type_sz(imm_reg.type) > REG_SIZE) {
>
I think you need to multiply by width here as well.
> reg.nr = alloc.allocate(1);
> reg.offset = 0;
> }
> +
> + ibld.MOV(retype(reg, imm_reg.type), imm_reg);
> + imm->nr = reg.nr;
> + imm->subreg_offset = reg.offset;
> +
> + reg.offset += imm->size;
>
Multiply by width.
You can ignore the comment about DF and Q types if you'd like. With all
the others addressed, this is
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
> }
> promoted_constants = table.len;
>
> @@ -294,13 +424,41 @@ fs_visitor::opt_combine_constants()
> for (int i = 0; i < table.len; i++) {
> foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
> fs_reg *reg = link->reg;
> - assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
> - fabsf(reg->f) == fabs(table.imm[i].val));
> +#ifdef DEBUG
> + switch (reg->type) {
> + case BRW_REGISTER_TYPE_F:
> + assert((isnan(reg->f) && isnan(table.imm[i].f)) ||
> + (fabsf(reg->f) == fabsf(table.imm[i].f)));
> + break;
> + case BRW_REGISTER_TYPE_HF:
> + assert((isnan(_mesa_half_to_float(reg->d & 0xffffu)) &&
> + isnan(_mesa_half_to_float(table.imm[i].w))) ||
> + (fabsf(_mesa_half_to_float(reg->d & 0xffffu)) ==
> + fabsf(_mesa_half_to_float(table.imm[i].w))));
> + break;
> + case BRW_REGISTER_TYPE_D:
> + assert(reg->type == BRW_REGISTER_TYPE_D &&
> + abs(reg->d) == abs(table.imm[i].d));
> + break;
> + case BRW_REGISTER_TYPE_UD:
> + assert(reg->d == table.imm[i].d);
> + break;
> + case BRW_REGISTER_TYPE_W:
> + assert(abs((int16_t) (reg->d & 0xffff)) == table.imm[i].w);
> + break;
> + case BRW_REGISTER_TYPE_UW:
> + assert(reg->type == BRW_REGISTER_TYPE_UW &&
> + (reg->ud & 0xffffu) == (uint16_t) table.imm[i].w);
> + break;
> + default:
> + break;
> + }
> +#endif
>
> reg->file = VGRF;
> reg->offset = table.imm[i].subreg_offset;
> reg->stride = 0;
> - reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
> + reg->negate = needs_negate(reg, &table.imm[i]);
> reg->nr = table.imm[i].nr;
> }
> }
> @@ -309,9 +467,9 @@ fs_visitor::opt_combine_constants()
> for (int i = 0; i < table.len; i++) {
> struct imm *imm = &table.imm[i];
>
> - printf("%.3fF - block %3d, reg %3d sub %2d, Uses: (%2d, %2d), "
> - "IP: %4d to %4d, length %4d\n",
> - imm->val,
> + printf("0x%016" PRIx64 " - block %3d, reg %3d sub %2d, "
> + "Uses: (%2d, %2d), IP: %4d to %4d, length %4d\n",
> + (uint64_t)(imm->d & BITFIELD64_MASK(imm->size * 8)),
> imm->block->num,
> imm->nr,
> imm->subreg_offset,
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190216/9fa78753/attachment-0001.html>
More information about the mesa-dev
mailing list