<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Reviewed-by: Topi Pohjolainen <<a href="mailto:topi.pohjolainen@intel.com" target="_blank">topi.pohjolainen@intel.com</a>><br>
---<br>
.../compiler/brw_fs_combine_constants.cpp | 60 +++++++++++++++----<br>
1 file changed, 49 insertions(+), 11 deletions(-)<br>
<br>
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp<br>
index 7343f77bb45..54017e5668b 100644<br>
--- a/src/intel/compiler/brw_fs_combine_constants.cpp<br>
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp<br>
@@ -36,6 +36,7 @@<br>
<br>
#include "brw_fs.h"<br>
#include "brw_cfg.h"<br>
+#include "util/half_float.h"<br>
<br>
using namespace brw;<br>
<br>
@@ -114,8 +115,9 @@ struct imm {<br>
*/<br>
exec_list *uses;<br>
<br>
- /** The immediate value. We currently only handle floats. */<br>
+ /** The immediate value. We currently only handle float and half-float. */<br>
float val;<br>
+ brw_reg_type type;<br></blockquote><div><br></div><div>I had a brief chat with Matt today and I think that this may be going in the wrong direction. In particular, I'd like us to eventually (maybe we can do it now?) generalize the combine_constants pass to more data types; in particular, integers. I recently came across a shader where the fact that we couldn't do combine_constants on integers was causing significant register pressure problems and spilling. (The test was doing a bunch of BFI2/BFE with constant sources.) It could also be a huge win for 8-bit and 64-bit where we can't put immediates in regular 2-src instructions.</div><div><br></div><div>What does this mean for the pass? I suspect we want a bit size instead of a type and a simple char[8] for the data and just make it a blob of bits. We may also want some sort of heuristic so we don't burn constant table space for things that are only used once or maybe even twice.</div><div><br></div><div>Normally, I would say "do it as a fixup" but if we go the direction of having a float and using _mesa_half_to_float and _mesa_float_to_half, I suspect it'll be harder to go for the bag-of-bits approach.<br></div><div><br></div><div>Thoughts?</div><div><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
/**<br>
* The GRF register and subregister number where we've decided to store the<br>
@@ -145,10 +147,10 @@ struct table {<br>
};<br>
<br>
static struct imm *<br>
-find_imm(struct table *table, float val)<br>
+find_imm(struct table *table, float val, brw_reg_type type)<br>
{<br>
for (int i = 0; i < table->len; i++) {<br>
- if (table->imm[i].val == val) {<br>
+ if (table->imm[i].val == val && table->imm[i].type == type) {<br>
return &table->imm[i];<br>
}<br>
}<br>
@@ -190,6 +192,20 @@ compare(const void *_a, const void *_b)<br>
return a->first_use_ip - b->first_use_ip;<br>
}<br>
<br>
+static bool<br>
+needs_negate(float reg_val, float imm_val, brw_reg_type type)<br>
+{<br>
+ /* reg_val represents the immediate value in the register in its original<br>
+ * bit-size, while imm_val is always a valid 32-bit float value.<br>
+ */<br>
+ if (type == BRW_REGISTER_TYPE_HF) {<br>
+ uint32_t reg_val_ud = *((uint32_t *) ®_val);<br>
+ reg_val = _mesa_half_to_float(reg_val_ud & 0xffff);<br>
+ }<br>
+<br>
+ return signbit(imm_val) != signbit(reg_val);<br>
+}<br>
+<br>
bool<br>
fs_visitor::opt_combine_constants()<br>
{<br>
@@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants()<br>
<br>
for (int i = 0; i < inst->sources; i++) {<br>
if (inst->src[i].file != IMM ||<br>
- inst->src[i].type != BRW_REGISTER_TYPE_F)<br>
+ (inst->src[i].type != BRW_REGISTER_TYPE_F &&<br>
+ inst->src[i].type != BRW_REGISTER_TYPE_HF))<br>
continue;<br>
<br>
- float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :<br>
- fabs(inst->src[i].f);<br>
- struct imm *imm = find_imm(&table, val);<br>
+ float val;<br>
+ if (inst->src[i].type == BRW_REGISTER_TYPE_F) {<br>
+ val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :<br>
+ fabs(inst->src[i].f);<br>
+ } else {<br>
+ val = !inst->can_do_source_mods(devinfo) ?<br>
+ _mesa_half_to_float(inst->src[i].d & 0xffff) :<br>
+ fabs(_mesa_half_to_float(inst->src[i].d & 0xffff));<br>
+ }<br>
+ struct imm *imm = find_imm(&table, val, inst->src[i].type);<br>
<br>
if (imm) {<br>
bblock_t *intersection = cfg_t::intersect(block, imm->block);<br>
@@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants()<br>
imm->uses = new(const_ctx) exec_list();<br>
imm->uses->push_tail(link(const_ctx, &inst->src[i]));<br>
imm->val = val;<br>
+ imm->type = inst->src[i].type;<br>
imm->uses_by_coissue = could_coissue(devinfo, inst);<br>
imm->must_promote = must_promote_imm(devinfo, inst);<br>
imm->first_use_ip = ip;<br>
@@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants()<br>
imm->block->last_non_control_flow_inst()->next);<br>
const fs_builder ibld = <a href="http://bld.at" rel="noreferrer" target="_blank">bld.at</a>(imm->block, n).exec_all().group(1, 0);<br>
<br>
- ibld.MOV(reg, brw_imm_f(imm->val));<br>
+ reg = retype(reg, imm->type);<br>
+ if (imm->type == BRW_REGISTER_TYPE_F) {<br>
+ ibld.MOV(reg, brw_imm_f(imm->val));<br>
+ } else {<br>
+ const uint16_t val_hf = _mesa_float_to_half(imm->val);<br>
+ ibld.MOV(reg, retype(brw_imm_uw(val_hf), BRW_REGISTER_TYPE_HF));<br>
+ }<br>
imm->nr = <a href="http://reg.nr" rel="noreferrer" target="_blank">reg.nr</a>;<br>
imm->subreg_offset = reg.offset;<br>
<br>
+ /* Keep offsets 32-bit aligned since we are mixing 32-bit and 16-bit<br>
+ * constants into the same register<br>
+ *<br>
+ * TODO: try to pack pairs of HF constants into each 32-bit slot<br>
+ */<br>
reg.offset += sizeof(float);<br>
- if (reg.offset == 8 * sizeof(float)) {<br>
+ if (reg.offset == REG_SIZE) {<br>
<a href="http://reg.nr" rel="noreferrer" target="_blank">reg.nr</a> = alloc.allocate(1);<br>
reg.offset = 0;<br>
}<br>
@@ -295,12 +331,14 @@ fs_visitor::opt_combine_constants()<br>
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {<br>
fs_reg *reg = link->reg;<br>
assert((isnan(reg->f) && isnan(table.imm[i].val)) ||<br>
- fabsf(reg->f) == fabs(table.imm[i].val));<br>
+ fabsf(reg->f) == fabs(table.imm[i].val) ||<br>
+ table.imm[i].type == BRW_REGISTER_TYPE_HF);<br>
<br>
reg->file = VGRF;<br>
+ reg->type = table.imm[i].type;<br>
reg->offset = table.imm[i].subreg_offset;<br>
reg->stride = 0;<br>
- reg->negate = signbit(reg->f) != signbit(table.imm[i].val);<br>
+ reg->negate = needs_negate(reg->f, table.imm[i].val, table.imm[i].type);<br>
reg->nr = table.imm[i].nr;<br>
}<br>
}<br>
-- <br>
2.17.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>