<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <span dir="ltr"><<a href="mailto:currojerez@riseup.net" target="_blank">currojerez@riseup.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies<br>
the code and will allow the optimization to succeed in more cases<br>
independent of whether the LOAD_PAYLOAD instruction can be found or<br>
not.<br>
---<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 79 +++++++++++-------------------------<br>
1 file changed, 23 insertions(+), 56 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index 45c4753..6abf776 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot()<br>
if (key->nr_color_regions != 1)<br>
return false;<br>
<br>
+ /* Requires clamping the color payload in the shader. */<br>
+ if (key->clamp_fragment_color)<br>
+ return false;<br></blockquote><div><br></div><div>This looks new... Is it actually needed? Could we simply do "if (tex_inst->saturate) return false;"? In any case, it should probably be it's own commit.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
/* Look for a texturing instruction immediately before the final FB_WRITE. */<br>
bblock_t *block = cfg->blocks[cfg->num_blocks - 1];<br>
fs_inst *fb_write = (fs_inst *)block->end();<br>
assert(fb_write->eot);<br>
- assert(fb_write->opcode == FS_OPCODE_FB_WRITE);<br>
+ assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL);<br>
<br>
/* There wasn't one; nothing to do. */<br>
if (unlikely(fb_write->prev->is_head_sentinel()))<br>
@@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot()<br>
* “Response Length of zero is allowed on all SIMD8* and SIMD16* sampler<br>
* messages except sample+killpix, resinfo, sampleinfo, LOD, and gather4*”<br>
*/<br>
- if (!tex_inst->is_tex() ||<br>
- tex_inst->opcode == SHADER_OPCODE_TXS ||<br>
- tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO ||<br>
- tex_inst->opcode == SHADER_OPCODE_LOD ||<br>
- tex_inst->opcode == SHADER_OPCODE_TG4 ||<br>
- tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)<br>
+ if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL &&<br>
+ tex_inst->opcode != FS_OPCODE_TXB_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL &&<br>
+ tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL)<br>
return false;<br></blockquote><div><br></div><div>Same here. This isn't just a rename, it's a switch from listing invalid cases to explicitly listing valid ones.<br><br></div><div>Other than those two comments, I really like this. It's a nice cleanup.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
- /* If there's no header present, we need to munge the LOAD_PAYLOAD as well.<br>
- * It's very likely to be the previous instruction.<br>
- */<br>
+ /* XXX - This shouldn't be necessary. */<br>
if (tex_inst->prev->is_head_sentinel())<br>
return false;<br>
<br>
- fs_inst *load_payload = (fs_inst *) tex_inst->prev;<br>
- if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)<br>
- return false;<br>
-<br>
assert(!tex_inst->eot); /* We can't get here twice */<br>
assert((tex_inst->offset & (0xff << 24)) == 0);<br>
<br>
@@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot()<br>
tex_inst->regs_written = 0;<br>
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);<br>
<br>
- /* If a header is present, marking the eot is sufficient. Otherwise, we need<br>
- * to create a new LOAD_PAYLOAD command with the same sources and a space<br>
- * saved for the header. Using a new destination register not only makes sure<br>
- * we have enough space, but it will make sure the dead code eliminator kills<br>
- * the instruction that this will replace.<br>
+ /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT<br>
+ * flag and submit a header together with the sampler message as required<br>
+ * by the hardware.<br>
*/<br>
- if (tex_inst->header_size != 0) {<br>
- invalidate_live_intervals();<br>
- return true;<br>
- }<br>
-<br>
- fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,<br>
- load_payload->sources + 1);<br>
- fs_reg *new_sources =<br>
- ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);<br>
-<br>
- new_sources[0] = fs_reg();<br>
- for (int i = 0; i < load_payload->sources; i++)<br>
- new_sources[i+1] = load_payload->src[i];<br>
-<br>
- /* The LOAD_PAYLOAD helper seems like the obvious choice here. However, it<br>
- * requires a lot of information about the sources to appropriately figure<br>
- * out the number of registers needed to be used. Given this stage in our<br>
- * optimization, we may not have the appropriate GRFs required by<br>
- * LOAD_PAYLOAD at this point (copy propagation). Therefore, we need to<br>
- * manually emit the instruction.<br>
- */<br>
- fs_inst *new_load_payload = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD,<br>
- load_payload->exec_size,<br>
- send_header,<br>
- new_sources,<br>
- load_payload->sources + 1);<br>
-<br>
- new_load_payload->regs_written = load_payload->regs_written + 1;<br>
- new_load_payload->header_size = 1;<br>
- tex_inst->mlen++;<br>
- tex_inst->header_size = 1;<br>
- tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], new_load_payload);<br>
- tex_inst->src[0] = send_header;<br>
-<br>
invalidate_live_intervals();<br>
return true;<br>
}<br>
@@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,<br>
sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);<br>
<br>
if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||<br>
- offset_value.file != BAD_FILE ||<br>
+ offset_value.file != BAD_FILE || inst->eot ||<br>
op == SHADER_OPCODE_SAMPLEINFO ||<br>
is_high_sampler(devinfo, sampler)) {<br>
/* For general texture offsets (no txf workaround), we need a header to<br>
@@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,<br>
* and we have an explicit header, we need to set up the sampler<br>
* writemask. It's reversed from normal: 1 means "don't write".<br>
*/<br>
- if (inst->regs_written != 4 * reg_width) {<br>
+ if (!inst->eot && inst->regs_written != 4 * reg_width) {<br>
assert((inst->regs_written % reg_width) == 0);<br>
unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;<br>
inst->offset |= mask << 12;<br>
@@ -5738,6 +5702,10 @@ fs_visitor::optimize()<br>
pass_num = 0;<br>
<br>
OPT(lower_simd_width);<br>
+<br>
+ /* After SIMD lowering just in case we had to unroll the EOT send. */<br>
+ OPT(opt_sampler_eot);<br>
+<br>
OPT(lower_logical_sends);<br>
<br>
if (progress) {<br>
@@ -5761,7 +5729,6 @@ fs_visitor::optimize()<br>
}<br>
<br>
OPT(opt_redundant_discard_jumps);<br>
- OPT(opt_sampler_eot);<br>
<br>
if (OPT(lower_load_payload)) {<br>
split_virtual_grfs();<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.3<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>