[Mesa-dev] [PATCH 07/25] i965/fs: Implement opt_sampler_eot() in terms of logical sends.
Jason Ekstrand
jason at jlekstrand.net
Sat May 28 14:46:01 UTC 2016
On Fri, May 27, 2016 at 7:05 PM, Francisco Jerez <currojerez at riseup.net>
wrote:
> This makes the whole LOAD_PAYLOAD munging unnecessary which simplifies
> the code and will allow the optimization to succeed in more cases
> independent of whether the LOAD_PAYLOAD instruction can be found or
> not.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 79
> +++++++++++-------------------------
> 1 file changed, 23 insertions(+), 56 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 45c4753..6abf776 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -2612,11 +2612,15 @@ fs_visitor::opt_sampler_eot()
> if (key->nr_color_regions != 1)
> return false;
>
> + /* Requires clamping the color payload in the shader. */
> + if (key->clamp_fragment_color)
> + return false;
>
This looks new... Is it actually needed? Could we simply do "if
(tex_inst->saturate) return false;"? In any case, it should probably be
it's own commit.
> +
> /* Look for a texturing instruction immediately before the final
> FB_WRITE. */
> bblock_t *block = cfg->blocks[cfg->num_blocks - 1];
> fs_inst *fb_write = (fs_inst *)block->end();
> assert(fb_write->eot);
> - assert(fb_write->opcode == FS_OPCODE_FB_WRITE);
> + assert(fb_write->opcode == FS_OPCODE_FB_WRITE_LOGICAL);
>
> /* There wasn't one; nothing to do. */
> if (unlikely(fb_write->prev->is_head_sentinel()))
> @@ -2629,24 +2633,20 @@ fs_visitor::opt_sampler_eot()
> * “Response Length of zero is allowed on all SIMD8* and SIMD16*
> sampler
> * messages except sample+killpix, resinfo, sampleinfo, LOD, and
> gather4*”
> */
> - if (!tex_inst->is_tex() ||
> - tex_inst->opcode == SHADER_OPCODE_TXS ||
> - tex_inst->opcode == SHADER_OPCODE_SAMPLEINFO ||
> - tex_inst->opcode == SHADER_OPCODE_LOD ||
> - tex_inst->opcode == SHADER_OPCODE_TG4 ||
> - tex_inst->opcode == SHADER_OPCODE_TG4_OFFSET)
> + if (tex_inst->opcode != SHADER_OPCODE_TEX_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXD_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXF_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXL_LOGICAL &&
> + tex_inst->opcode != FS_OPCODE_TXB_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXF_CMS_W_LOGICAL &&
> + tex_inst->opcode != SHADER_OPCODE_TXF_UMS_LOGICAL)
> return false;
>
Same here. This isn't just a rename, it's a switch from listing invalid
cases to explicitly listing valid ones.
Other than those two comments, I really like this. It's a nice cleanup.
>
> - /* If there's no header present, we need to munge the LOAD_PAYLOAD as
> well.
> - * It's very likely to be the previous instruction.
> - */
> + /* XXX - This shouldn't be necessary. */
> if (tex_inst->prev->is_head_sentinel())
> return false;
>
> - fs_inst *load_payload = (fs_inst *) tex_inst->prev;
> - if (load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
> - return false;
> -
> assert(!tex_inst->eot); /* We can't get here twice */
> assert((tex_inst->offset & (0xff << 24)) == 0);
>
> @@ -2658,46 +2658,10 @@ fs_visitor::opt_sampler_eot()
> tex_inst->regs_written = 0;
> fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
>
> - /* If a header is present, marking the eot is sufficient. Otherwise,
> we need
> - * to create a new LOAD_PAYLOAD command with the same sources and a
> space
> - * saved for the header. Using a new destination register not only
> makes sure
> - * we have enough space, but it will make sure the dead code
> eliminator kills
> - * the instruction that this will replace.
> + /* Marking EOT is sufficient, lower_logical_sends() will notice the EOT
> + * flag and submit a header together with the sampler message as
> required
> + * by the hardware.
> */
> - if (tex_inst->header_size != 0) {
> - invalidate_live_intervals();
> - return true;
> - }
> -
> - fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
> - load_payload->sources + 1);
> - fs_reg *new_sources =
> - ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
> -
> - new_sources[0] = fs_reg();
> - for (int i = 0; i < load_payload->sources; i++)
> - new_sources[i+1] = load_payload->src[i];
> -
> - /* The LOAD_PAYLOAD helper seems like the obvious choice here.
> However, it
> - * requires a lot of information about the sources to appropriately
> figure
> - * out the number of registers needed to be used. Given this stage in
> our
> - * optimization, we may not have the appropriate GRFs required by
> - * LOAD_PAYLOAD at this point (copy propagation). Therefore, we need to
> - * manually emit the instruction.
> - */
> - fs_inst *new_load_payload = new(mem_ctx)
> fs_inst(SHADER_OPCODE_LOAD_PAYLOAD,
> -
> load_payload->exec_size,
> - send_header,
> - new_sources,
> - load_payload->sources
> + 1);
> -
> - new_load_payload->regs_written = load_payload->regs_written + 1;
> - new_load_payload->header_size = 1;
> - tex_inst->mlen++;
> - tex_inst->header_size = 1;
> - tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1],
> new_load_payload);
> - tex_inst->src[0] = send_header;
> -
> invalidate_live_intervals();
> return true;
> }
> @@ -4153,7 +4117,7 @@ lower_sampler_logical_send_gen7(const fs_builder
> &bld, fs_inst *inst, opcode op,
> sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
>
> if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
> - offset_value.file != BAD_FILE ||
> + offset_value.file != BAD_FILE || inst->eot ||
> op == SHADER_OPCODE_SAMPLEINFO ||
> is_high_sampler(devinfo, sampler)) {
> /* For general texture offsets (no txf workaround), we need a
> header to
> @@ -4174,7 +4138,7 @@ lower_sampler_logical_send_gen7(const fs_builder
> &bld, fs_inst *inst, opcode op,
> * and we have an explicit header, we need to set up the sampler
> * writemask. It's reversed from normal: 1 means "don't write".
> */
> - if (inst->regs_written != 4 * reg_width) {
> + if (!inst->eot && inst->regs_written != 4 * reg_width) {
> assert((inst->regs_written % reg_width) == 0);
> unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) &
> 0xf;
> inst->offset |= mask << 12;
> @@ -5738,6 +5702,10 @@ fs_visitor::optimize()
> pass_num = 0;
>
> OPT(lower_simd_width);
> +
> + /* After SIMD lowering just in case we had to unroll the EOT send. */
> + OPT(opt_sampler_eot);
> +
> OPT(lower_logical_sends);
>
> if (progress) {
> @@ -5761,7 +5729,6 @@ fs_visitor::optimize()
> }
>
> OPT(opt_redundant_discard_jumps);
> - OPT(opt_sampler_eot);
>
> if (OPT(lower_load_payload)) {
> split_virtual_grfs();
> --
> 2.7.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160528/4575b182/attachment-0001.html>
More information about the mesa-dev
mailing list