[Mesa-dev] [PATCH 1/4] i965/fs/skl+: Use ld_lz when the LOD parameter is zero
Matt Turner
mattst88 at gmail.com
Wed May 4 06:04:31 UTC 2016
From: Neil Roberts <neil at linux.intel.com>
Adds an optimisation pass which recognises the LD sampler message type
when the LOD parameter is either a constant zero or not given and
replaces it with the LD_LZ message type. This is the same but the LOD
is hardcoded to zero and doesn't need to be in the message. This can
be a benefit for shaders using texelFetch with 3 coordinates because
otherwise the LOD parameter can't be optimised out because it comes
before the r coordinate.
[mattst88]: Does not affect anything in shader-db.
Reviewed-by: Matt Turner <mattst88 at gmail.com>
---
src/mesa/drivers/dri/i965/brw_defines.h | 2 +
src/mesa/drivers/dri/i965/brw_disasm.c | 1 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 97 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 ++
.../drivers/dri/i965/brw_schedule_instructions.cpp | 1 +
src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +
7 files changed, 110 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 60b696c..e23f372 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -977,6 +977,7 @@ enum opcode {
SHADER_OPCODE_TXD_LOGICAL,
SHADER_OPCODE_TXF,
SHADER_OPCODE_TXF_LOGICAL,
+ SHADER_OPCODE_TXF_LZ,
SHADER_OPCODE_TXL,
SHADER_OPCODE_TXL_LOGICAL,
SHADER_OPCODE_TXS,
@@ -1636,6 +1637,7 @@ enum brw_message_target {
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17
#define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ 26
#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 1778419..046e1b8 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -550,6 +550,7 @@ static const char *const gen5_sampler_msg_type[] = {
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po",
[GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
[HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
+ [GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ] = "ld_lz",
[GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs",
[GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms",
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 18760dd..15df298 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -949,6 +949,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_LZ:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
@@ -2482,6 +2483,100 @@ fs_visitor::opt_zero_samples()
return progress;
}
+static bool
+lod_source_is_zero(const fs_inst *send_inst)
+{
+ int reg_offset = send_inst->exec_size / 8 * 2 + send_inst->header_size;
+ const fs_reg src = byte_offset(send_inst->src[0], reg_offset * 32);
+
+ /* Look for the last instruction that writes to the source */
+ foreach_inst_in_block_reverse_starting_from(const fs_inst, inst, send_inst) {
+ if (inst->overwrites_reg(src)) {
+ return (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
+ inst->src[inst->header_size + 2].is_zero());
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Replace LD sample messages that have a zero LOD with LD_LZ. This
+ * instruction is available since Gen9. It would help for doing texelFetch
+ * when passing three coordinates because then the LOD can be skipped.
+ */
+bool
+fs_visitor::opt_ld_lz()
+{
+ if (devinfo->gen < 9)
+ return false;
+
+ bool progress = false;
+
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->opcode != SHADER_OPCODE_TXF)
+ continue;
+
+ /* If the LOD parameter is not sent or is a constant zero then we can
+ * change the instruction.
+ */
+ bool lod_included = (inst->mlen - inst->header_size >=
+ inst->exec_size / 8 * 3);
+ if (lod_included && !lod_source_is_zero(inst))
+ continue;
+
+ inst->opcode = SHADER_OPCODE_TXF_LZ;
+
+ if (lod_included) {
+ inst->mlen -= inst->exec_size / 8;
+
+ /* If the r coordinate is included then we need a new LOAD_PAYLOAD
+ * instruction which has it in the right place.
+ */
+ if (inst->mlen - inst->header_size >= inst->exec_size / 8 * 3) {
+ const fs_builder ibld(this, block, inst);
+ fs_reg send_header = fs_reg(VGRF, alloc.allocate(inst->mlen),
+ BRW_REGISTER_TYPE_F);
+ int n_sources = ((inst->mlen - inst->header_size) *
+ 8 / inst->exec_size +
+ inst->header_size);
+ fs_reg *new_sources = ralloc_array(mem_ctx, fs_reg, n_sources);
+
+ for (int i = 0; i < n_sources; i++) {
+ int j;
+ if (i >= inst->header_size + 2)
+ j = i + 1;
+ else
+ j = i;
+ new_sources[i] = offset(inst->src[0], ibld, j);
+ }
+
+ /* The LOAD_PAYLOAD helper is not used for the same reasons given
+ * in fs_visitor::opt_sample_eot.
+ */
+ fs_inst *new_load_payload =
+ new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD,
+ inst->exec_size,
+ send_header,
+ new_sources,
+ n_sources);
+
+ new_load_payload->regs_written = inst->mlen;
+ new_load_payload->header_size = inst->header_size;
+ inst->insert_before(block, new_load_payload);
+ inst->src[0] = send_header;
+ }
+ }
+
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
/**
* Optimize sample messages which are followed by the final RT write.
*
@@ -4156,6 +4251,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
coordinate_done = true;
break;
+
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
@@ -5329,6 +5425,7 @@ fs_visitor::optimize()
OPT(opt_redundant_discard_jumps);
OPT(opt_saturate_propagation);
OPT(opt_zero_samples);
+ OPT(opt_ld_lz);
OPT(register_coalesce);
OPT(compute_to_mrf);
OPT(eliminate_find_live_channel);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index ba6bd3f..66b39dc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -227,6 +227,7 @@ public:
bool opt_saturate_propagation();
bool opt_cmod_propagation();
bool opt_zero_samples();
+ bool opt_ld_lz();
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8654ca4..0516d28 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -807,6 +807,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
+ case SHADER_OPCODE_TXF_LZ:
+ assert(devinfo->gen >= 9);
+ msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
+ break;
case SHADER_OPCODE_TXF_CMS_W:
assert(devinfo->gen >= 9);
msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
@@ -2115,6 +2119,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_LZ:
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 8d92584..557811c 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -215,6 +215,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_LZ:
case SHADER_OPCODE_TXL:
/* 18 cycles:
* mov(8) g115<1>F 0F { align1 WE_normal 1Q };
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 068244b..69f62d9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -213,6 +213,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
return "txf";
case SHADER_OPCODE_TXF_LOGICAL:
return "txf_logical";
+ case SHADER_OPCODE_TXF_LZ:
+ return "txf_lz";
case SHADER_OPCODE_TXL:
return "txl";
case SHADER_OPCODE_TXL_LOGICAL:
@@ -749,6 +751,7 @@ backend_instruction::is_tex() const
opcode == FS_OPCODE_TXB ||
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
+ opcode == SHADER_OPCODE_TXF_LZ ||
opcode == SHADER_OPCODE_TXF_CMS ||
opcode == SHADER_OPCODE_TXF_CMS_W ||
opcode == SHADER_OPCODE_TXF_UMS ||
--
2.7.3
More information about the mesa-dev
mailing list