<html dir="ltr"><head></head><body style="text-align:left; direction:ltr;"><div>On Sat, 2019-02-16 at 09:29 -0600, Jason Ekstrand wrote:</div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Feb 12, 2019 at 5:57 AM Iago Toral Quiroga <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>> wrote:<br></div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex">At the very least we need it to handle HF too, since we are doing<br>
constant propagation for MAD and LRP, which relies on this pass<br>
to promote the immediates to GRF in the end, but ideally<br>
we want it to support even more types so we can take advantage<br>
of it to improve register pressure in some scenarios.<br>
---<br>
.../compiler/brw_fs_combine_constants.cpp | 202 ++++++++++++++++--<br>
1 file changed, 180 insertions(+), 22 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp<br>
index 7343f77bb45..5d79f1a0826 100644<br>
--- a/src/intel/compiler/brw_fs_combine_constants.cpp<br>
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp<br>
@@ -36,6 +36,7 @@<br>
<br>
#include "brw_fs.h"<br>
#include "brw_cfg.h"<br>
+#include "util/half_float.h"<br>
<br>
using namespace brw;<br>
<br>
@@ -114,8 +115,17 @@ struct imm {<br>
*/<br>
exec_list *uses;<br>
<br>
- /** The immediate value. We currently only handle floats. */<br>
- float val;<br>
+ /** The immediate value */<br>
+ union {<br>
+ char bytes[8];<br>
+ float f;<br>
+ int32_t d;<br>
+ int16_t w;<br>
+ };<br>
+ uint8_t size;<br>
+<br>
+ /** When promoting half-float we need to account for certain restrictions */<br>
+ bool is_half_float;<br>
<br>
/**<br>
* The GRF register and subregister number where we've decided to store the<br>
@@ -145,10 +155,11 @@ struct table {<br>
};<br>
<br>
static struct imm *<br>
-find_imm(struct table *table, float val)<br>
+find_imm(struct table *table, void *data, uint8_t size)<br>
{<br>
for (int i = 0; i < table->len; i++) {<br>
- if (table->imm[i].val == val) {<br>
+ if (table->imm[i].size == size &&<br>
+ !memcmp(table->imm[i].bytes, data, size)) {<br>
return &table->imm[i];<br>
}<br>
}<br>
@@ -190,6 +201,96 @@ compare(const void *_a, const void *_b)<br>
return a->first_use_ip - b->first_use_ip;<br>
}<br>
<br>
+static bool<br>
+get_constant_value(const struct gen_device_info *devinfo,<br>
+ const fs_inst *inst, uint32_t src_idx,<br>
+ void *out, brw_reg_type *out_type)<br>
+{<br>
+ const bool can_do_source_mods = inst->can_do_source_mods(devinfo);<br>
+ const fs_reg *src = &inst->src[src_idx];<br>
+<br>
+ *out_type = src->type;<br>
+<br>
+ switch (*out_type) {<br>
+ case BRW_REGISTER_TYPE_F: {<br>
+ float val = !can_do_source_mods ? src->f : fabsf(src->f);<br>
+ memcpy(out, &val, 4);<br>
+ break;<br>
+ }<br>
+ case BRW_REGISTER_TYPE_HF: {<br>
+ uint16_t val = src->d & 0xffffu;<br>
+ if (can_do_source_mods)<br>
+ val = _mesa_float_to_half(fabsf(_mesa_half_to_float(val)));<br>
+ memcpy(out, &val, 2);<br>
+ break;<br>
+ }<br>
+ case BRW_REGISTER_TYPE_D: {<br>
+ int32_t val = !can_do_source_mods ? src->d : abs(src->d);<br>
+ memcpy(out, &val, 4);<br>
+ break;<br>
+ }<br>
+ case BRW_REGISTER_TYPE_UD:<br>
+ memcpy(out, &src->ud, 4);<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_W: {<br>
+ int16_t val = src->d & 0xffffu;<br>
+ if (can_do_source_mods)<br>
+ val = abs(val);<br>
+ memcpy(out, &val, 2);<br>
+ break;<br>
+ }<br>
+ case BRW_REGISTER_TYPE_UW:<br>
+ memcpy(out, &src->ud, 2);<br>
+ break;<br></blockquote><div><br></div><div>You could also throw in DF and Q types. This is probably sufficient for now though.<br></div></div></div></blockquote><div><br></div><div>Sure, I was waiting to do that until I started enabling constant propagation of 64-bit types but there is no reason why we can't leave the pass prepared for that I guess. It seems that for platforms that don't support 64-bit types we should not be seeing 64-bit constants here, so I guess there is no risk.</div><div><br></div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div> </div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex">
+ default:<br>
+ return false;<br>
+ };<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+static struct brw_reg<br>
+build_imm_reg_for_copy(struct imm *imm)<br>
+{<br>
+ switch (imm->size) {<br>
+ case 4:<br>
+ return brw_imm_d(imm->d);<br>
+ case 2:<br>
+ return brw_imm_w(imm->w);<br>
+ default:<br>
+ unreachable("not implemented");<br>
+ }<br>
+}<br>
+<br>
+static inline uint32_t<br>
+get_alignment_for_imm(const struct imm *imm)<br>
+{<br>
+ if (imm->is_half_float)<br>
+ return 4; /* At least MAD seems to require this */<br>
+ else<br>
+ return imm->size;<br>
+}<br>
+<br>
+static bool<br>
+needs_negate(const struct fs_reg *reg, const struct imm *imm)<br>
+{<br>
+ switch (reg->type) {<br>
+ case BRW_REGISTER_TYPE_F:<br>
+ return signbit(reg->f) != signbit(imm->f);<br>
+ case BRW_REGISTER_TYPE_D:<br>
+ return (reg->d < 0) != (imm->d < 0);<br>
+ case BRW_REGISTER_TYPE_HF:<br>
+ return (reg->d & 0x8000u) != (imm->w & 0x8000u);<br>
+ case BRW_REGISTER_TYPE_W:<br>
+ return ((reg->d & 0xffffu) < 0) != (imm->w < 0);<br>
+ case BRW_REGISTER_TYPE_UD:<br>
+ case BRW_REGISTER_TYPE_UW:<br>
+ return false;<br>
+ default:<br>
+ unreachable("not implemented");<br>
+ };<br>
+}<br>
+<br>
bool<br>
fs_visitor::opt_combine_constants()<br>
{<br>
@@ -214,13 +315,17 @@ fs_visitor::opt_combine_constants()<br>
continue;<br>
<br>
for (int i = 0; i < inst->sources; i++) {<br>
- if (inst->src[i].file != IMM ||<br>
- inst->src[i].type != BRW_REGISTER_TYPE_F)<br>
+ if (inst->src[i].file != IMM)<br>
continue;<br>
<br>
- float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :<br>
- fabs(inst->src[i].f);<br>
- struct imm *imm = find_imm(&table, val);<br>
+ char data[8];<br>
+ brw_reg_type type;<br>
+ if (!get_constant_value(devinfo, inst, i, data, &type))<br>
+ continue;<br>
+<br>
+ uint8_t size = type_sz(type);<br>
+<br>
+ struct imm *imm = find_imm(&table, data, size);<br>
<br>
if (imm) {<br>
bblock_t *intersection = cfg_t::intersect(block, imm->block);<br>
@@ -237,7 +342,9 @@ fs_visitor::opt_combine_constants()<br>
imm->inst = inst;<br>
imm->uses = new(const_ctx) exec_list();<br>
imm->uses->push_tail(link(const_ctx, &inst->src[i]));<br>
- imm->val = val;<br>
+ memcpy(imm->bytes, data, size);<br>
+ imm->size = size;<br>
+ imm->is_half_float = type == BRW_REGISTER_TYPE_HF;<br></blockquote><div><br></div><div>In the "if (imm)" case above, I think we want to do<br></div><div><br></div><div>if (type == BRW_REGISTER_TYPE_HF)</div><div> imm->is_half_float = true;</div></div><div class="gmail_quote"><br></div><div class="gmail_quote">in case we get a constant that's used first as a uint16_t (which won't set is_half_float) and then later as a float16_t.</div></div></blockquote><div><br></div><div>Yes, good catch.</div><div><br></div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div> </div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex">
imm->uses_by_coissue = could_coissue(devinfo, inst);<br>
imm->must_promote = must_promote_imm(devinfo, inst);<br>
imm->first_use_ip = ip;<br>
@@ -276,17 +383,40 @@ fs_visitor::opt_combine_constants()<br>
*/<br>
exec_node *n = (imm->inst ? imm->inst :<br>
imm->block->last_non_control_flow_inst()->next);<br>
- const fs_builder ibld = <a href="http://bld.at" rel="noreferrer" target="_blank">bld.at</a>(imm->block, n).exec_all().group(1, 0);<br>
<br>
- ibld.MOV(reg, brw_imm_f(imm->val));<br>
- imm->nr = <a href="http://reg.nr" rel="noreferrer" target="_blank">reg.nr</a>;<br>
- imm->subreg_offset = reg.offset;<br>
+ /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:<br>
+ *<br>
+ * "In Align16 mode, the channel selects and channel enables apply to a<br>
+ * pair of half-floats, because these parameters are defined for DWord<br>
+ * elements ONLY. This is applicable when both source and destination<br>
+ * are half-floats."<br>
+ *<br>
+ * This means that Align16 instructions that use promoted HF immediates<br>
+ * and use a <0,1,0>:HF region would read 2 HF slots instead of<br>
+ * replicating the single one we want. To avoid this, we always populate<br>
+ * both HF slots within a DWord with the constant.<br>
+ */<br>
+ const uint32_t width = devinfo->gen == 8 && imm->is_half_float ? 2 : 1;<br>
+ const fs_builder ibld = <a href="http://bld.at" rel="noreferrer" target="_blank">bld.at</a>(imm->block, n).exec_all().group(width, 0);<br>
+<br>
+ /* Put the immediate in an offset aligned to its size. Some instructions<br>
+ * seem to have additional alignment requirements, so account for that<br>
+ * too.<br>
+ */<br>
+ reg.offset = ALIGN(reg.offset, get_alignment_for_imm(imm));<br>
<br>
- reg.offset += sizeof(float);<br>
- if (reg.offset == 8 * sizeof(float)) {<br>
+ /* Ensure we have enough space in the register to copy the immediate */<br>
+ struct brw_reg imm_reg = build_imm_reg_for_copy(imm);<br>
+ if (reg.offset + type_sz(imm_reg.type) > REG_SIZE) {<br></blockquote><div><br></div><div>I think you need to multiply by width here as well.<br></div><div> </div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex">
<a href="http://reg.nr" rel="noreferrer" target="_blank">reg.nr</a> = alloc.allocate(1);<br>
reg.offset = 0;<br>
}<br>
+<br>
+ ibld.MOV(retype(reg, imm_reg.type), imm_reg);<br>
+ imm->nr = <a href="http://reg.nr" rel="noreferrer" target="_blank">reg.nr</a>;<br>
+ imm->subreg_offset = reg.offset;<br>
+<br>
+ reg.offset += imm->size;<br></blockquote><div><br></div><div>Multiply by width.</div></div></div></blockquote><div><br></div><div>Yes.</div><div><br></div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div>You can ignore the comment about DF and Q types if you'd like. With all the others addressed, this is</div><div><br></div><div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br></div><div> </div><blockquote type="cite" style="margin:0 0 0 .8ex; border-left:2px #729fcf solid;padding-left:1ex">
}<br>
promoted_constants = table.len;<br>
<br>
@@ -294,13 +424,41 @@ fs_visitor::opt_combine_constants()<br>
for (int i = 0; i < table.len; i++) {<br>
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {<br>
fs_reg *reg = link->reg;<br>
- assert((isnan(reg->f) && isnan(table.imm[i].val)) ||<br>
- fabsf(reg->f) == fabs(table.imm[i].val));<br>
+#ifdef DEBUG<br>
+ switch (reg->type) {<br>
+ case BRW_REGISTER_TYPE_F:<br>
+ assert((isnan(reg->f) && isnan(table.imm[i].f)) ||<br>
+ (fabsf(reg->f) == fabsf(table.imm[i].f)));<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_HF:<br>
+ assert((isnan(_mesa_half_to_float(reg->d & 0xffffu)) &&<br>
+ isnan(_mesa_half_to_float(table.imm[i].w))) ||<br>
+ (fabsf(_mesa_half_to_float(reg->d & 0xffffu)) ==<br>
+ fabsf(_mesa_half_to_float(table.imm[i].w))));<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_D:<br>
+ assert(reg->type == BRW_REGISTER_TYPE_D &&<br>
+ abs(reg->d) == abs(table.imm[i].d));<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_UD:<br>
+ assert(reg->d == table.imm[i].d);<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_W:<br>
+ assert(abs((int16_t) (reg->d & 0xffff)) == table.imm[i].w);<br>
+ break;<br>
+ case BRW_REGISTER_TYPE_UW:<br>
+ assert(reg->type == BRW_REGISTER_TYPE_UW &&<br>
+ (reg->ud & 0xffffu) == (uint16_t) table.imm[i].w);<br>
+ break;<br>
+ default:<br>
+ break;<br>
+ }<br>
+#endif<br>
<br>
reg->file = VGRF;<br>
reg->offset = table.imm[i].subreg_offset;<br>
reg->stride = 0;<br>
- reg->negate = signbit(reg->f) != signbit(table.imm[i].val);<br>
+ reg->negate = needs_negate(reg, &table.imm[i]);<br>
reg->nr = table.imm[i].nr;<br>
}<br>
}<br>
@@ -309,9 +467,9 @@ fs_visitor::opt_combine_constants()<br>
for (int i = 0; i < table.len; i++) {<br>
struct imm *imm = &table.imm[i];<br>
<br>
- printf("%.3fF - block %3d, reg %3d sub %2d, Uses: (%2d, %2d), "<br>
- "IP: %4d to %4d, length %4d\n",<br>
- imm->val,<br>
+ printf("0x%016" PRIx64 " - block %3d, reg %3d sub %2d, "<br>
+ "Uses: (%2d, %2d), IP: %4d to %4d, length %4d\n",<br>
+ (uint64_t)(imm->d & BITFIELD64_MASK(imm->size * 8)),<br>
imm->block->num,<br>
imm->nr,<br>
imm->subreg_offset,<br>
</blockquote></div></div></blockquote></body></html>