[Mesa-dev] [PATCH 2/4] i965/fs/skl+: Prepare LOD-zero optimization for sample_lz.
Matt Turner
mattst88 at gmail.com
Wed May 4 06:04:32 UTC 2016
The next patch will add support for recognizing sample_lz, whose lod
argument is in a different location in the payload. This patch
generalizes the function to handle that (and renames it opt_sample_lz
because sample_lz is a much more important thing to recognize).
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 58 ++++++++++++++++++++++--------------
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
2 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 15df298..dc2af66 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2483,30 +2483,35 @@ fs_visitor::opt_zero_samples()
return progress;
}
-static bool
-lod_source_is_zero(const fs_inst *send_inst)
+static fs_inst *
+lod_source_is_zero(const fs_inst *send_inst, int lod_offset)
{
int reg_offset = send_inst->exec_size / 8 * 2 + send_inst->header_size;
const fs_reg src = byte_offset(send_inst->src[0], reg_offset * 32);
/* Look for the last instruction that writes to the source */
- foreach_inst_in_block_reverse_starting_from(const fs_inst, inst, send_inst) {
- if (inst->overwrites_reg(src)) {
- return (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
- inst->src[inst->header_size + 2].is_zero());
+ foreach_inst_in_block_reverse_starting_from(fs_inst, inst, send_inst) {
+ if (inst->overwrites_reg(src) &&
+ inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
+ inst->src[inst->header_size + lod_offset].is_zero()) {
+ return inst;
}
}
- return false;
+ return NULL;
}
/**
- * Replace LD sample messages that have a zero LOD with LD_LZ. This
- * instruction is available since Gen9. It would help for doing texelFetch
- * when passing three coordinates because then the LOD can be skipped.
+ * Replace sample messages that have a zero LOD with the special _LZ messages.
+ * These instructions are available since Gen9. The available _LZ messages
+ * are:
+ *
+ * - ld_lz
+ * - sample_lz
+ * - sample_c_lz
*/
bool
-fs_visitor::opt_ld_lz()
+fs_visitor::opt_sample_lz()
{
if (devinfo->gen < 9)
return false;
@@ -2520,20 +2525,27 @@ fs_visitor::opt_ld_lz()
/* If the LOD parameter is not sent or is a constant zero then we can
* change the instruction.
*/
+ int lod_offset;
+ if (inst->opcode == SHADER_OPCODE_TXF) {
+ lod_offset = 2;
+ } else {
+ unreachable("not reached");
+ }
bool lod_included = (inst->mlen - inst->header_size >=
- inst->exec_size / 8 * 3);
- if (lod_included && !lod_source_is_zero(inst))
- continue;
+ inst->exec_size / 8 * (lod_offset + 1));
- inst->opcode = SHADER_OPCODE_TXF_LZ;
+ if (lod_included && inst->mlen - inst->header_size > inst->exec_size / 8) {
+ fs_inst *load_payload = lod_source_is_zero(inst, lod_offset);
+ if (!load_payload)
+ continue;
- if (lod_included) {
inst->mlen -= inst->exec_size / 8;
- /* If the r coordinate is included then we need a new LOAD_PAYLOAD
- * instruction which has it in the right place.
+ /* If the lod is included then we need a new LOAD_PAYLOAD instruction
+ * which has latter arguments in the right places.
*/
- if (inst->mlen - inst->header_size >= inst->exec_size / 8 * 3) {
+ if (inst->mlen - inst->header_size >=
+ inst->exec_size / 8 * (lod_offset + 1)) {
const fs_builder ibld(this, block, inst);
fs_reg send_header = fs_reg(VGRF, alloc.allocate(inst->mlen),
BRW_REGISTER_TYPE_F);
@@ -2544,11 +2556,11 @@ fs_visitor::opt_ld_lz()
for (int i = 0; i < n_sources; i++) {
int j;
- if (i >= inst->header_size + 2)
+ if (i >= inst->header_size + lod_offset)
j = i + 1;
else
j = i;
- new_sources[i] = offset(inst->src[0], ibld, j);
+ new_sources[i] = load_payload->src[j];
}
/* The LOAD_PAYLOAD helper is not used for the same reasons given
@@ -2568,6 +2580,8 @@ fs_visitor::opt_ld_lz()
}
}
+ inst->opcode = SHADER_OPCODE_TXF_LZ;
+
progress = true;
}
@@ -5425,7 +5439,7 @@ fs_visitor::optimize()
OPT(opt_redundant_discard_jumps);
OPT(opt_saturate_propagation);
OPT(opt_zero_samples);
- OPT(opt_ld_lz);
+ OPT(opt_sample_lz);
OPT(register_coalesce);
OPT(compute_to_mrf);
OPT(eliminate_find_live_channel);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 66b39dc..9c01709 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -227,7 +227,7 @@ public:
bool opt_saturate_propagation();
bool opt_cmod_propagation();
bool opt_zero_samples();
- bool opt_ld_lz();
+ bool opt_sample_lz();
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
--
2.7.3
More information about the mesa-dev
mailing list