[Mesa-dev] [PATCH 01/12] i965/fs: Manually generate the meta fast-clear shader
Jason Ekstrand
jason at jlekstrand.net
Fri Sep 19 13:10:19 PDT 2014
Previously, we were generating the fast-clear shader from GLSL. The
problem is that fast clears require that we use a replicated write rather
than a regular write instruction. In order to get this we had a
complicated and somewhat fragile optimization pass that looked for places
where we can use a replicated write and used it. Since replicated writes
have a lot of restrictions, we only ever use them for fast-clear
operations.
This commit replaces the optimization pass with a function that just
generates the shader we want. This is a) less code, b) less fragile than
the optimization pass, and c) generates a more efficient shader.
Signed-off-by: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Kristian Høgsberg <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 122 ++++++++++-------------------------
src/mesa/drivers/dri/i965/brw_fs.h | 3 +-
2 files changed, 34 insertions(+), 91 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index fa95c81..3fb1545 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2319,98 +2319,42 @@ fs_visitor::compute_to_mrf()
* instructions to FS_OPCODE_REP_FB_WRITE.
*/
void
-fs_visitor::try_rep_send()
+fs_visitor::emit_repclear_shader()
{
- int i, count;
- fs_inst *start = NULL;
- bblock_t *mov_block;
+ int base_mrf = 1;
+ int color_mrf = base_mrf + 2;
- /* From the Ivybridge PRM, Volume 4 Part 1, section 3.9.11.2
- * ("Message Descriptor - Render Target Write"):
- *
- * "SIMD16_REPDATA message must not be used in SIMD8 pixel-shaders."
- */
- if (dispatch_width != 16)
- return;
-
- /* The constant color write message can't handle anything but the 4 color
- * values. We could do MRT, but the loops below would need to understand
- * handling the header being enabled or disabled on different messages. It
- * also requires that the render target be tiled, which might not be the
- * case for some EGLImage paths or if we some day do rendering to PBOs.
- */
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH) ||
- payload.aa_dest_stencil_reg ||
- payload.dest_depth_reg ||
- dual_src_output.file != BAD_FILE)
- return;
-
- /* The optimization is implemented as one pass through the instruction
- * list. We keep track of the most recent block of MOVs into sequential
- * MRFs from single, sequential float registers (ie uniforms). Then when
- * we find an FB_WRITE opcode, we see if the payload registers match the
- * destination registers in our block of MOVs.
- */
- count = 0;
- foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
- if (count == 0) {
- start = inst;
- mov_block = block;
- }
- if (inst->opcode == BRW_OPCODE_MOV &&
- inst->dst.file == MRF &&
- inst->dst.reg == start->dst.reg + 2 * count &&
- inst->src[0].file == HW_REG &&
- inst->src[0].reg_offset == start->src[0].reg_offset + count) {
- if (count == 0) {
- start = inst;
- mov_block = block;
- }
- count++;
- }
-
- if (inst->opcode == FS_OPCODE_FB_WRITE &&
- count == 4 &&
- (inst->base_mrf == start->dst.reg ||
- (inst->base_mrf + 2 == start->dst.reg && inst->header_present))) {
- fs_inst *mov = MOV(start->dst, start->src[0]);
+ fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)));
+ mov->force_writemask_all = true;
+ mov->force_uncompressed = true;
- /* Make a MOV that moves the four floats into the replicated write
- * payload. Since we're running at the very end of code generation
- * we can use hw registers and generate the stride and offsets we
- * need for this MOV. We use the first of the eight registers
- * allocated for the SIMD16 payload for the four floats.
- */
- mov->dst.fixed_hw_reg =
- brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE,
- start->dst.reg, 0);
- mov->dst.file = HW_REG;
- mov->dst.type = mov->dst.fixed_hw_reg.type;
-
- mov->src[0].fixed_hw_reg =
- brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
- mov->src[0].file = HW_REG;
- mov->src[0].type = mov->src[0].fixed_hw_reg.type;
- mov->force_writemask_all = true;
- mov->dst.type = BRW_REGISTER_TYPE_F;
-
- /* Replace the four MOVs with the new vec4 MOV. */
- start->insert_before(mov_block, mov);
- for (i = 0; i < 4; i++)
- ((fs_inst *) mov->next)->remove(mov_block);
-
- /* Finally, adjust the message length and set the opcode to
- * REP_FB_WRITE for the send, so that the generator will use the
- * replicated data mesage type. Then reset count so we'll start
- * looking for a new block in case we're in a MRT shader.
- */
- inst->opcode = FS_OPCODE_REP_FB_WRITE;
- inst->mlen -= 7;
- count = 0;
+ fs_inst *write;
+ if (key->nr_color_regions == 1) {
+ write = emit(FS_OPCODE_REP_FB_WRITE);
+ write->saturate = key->clamp_fragment_color;
+ write->base_mrf = color_mrf;
+ write->target = 0;
+ write->header_present = false;
+ write->mlen = 1;
+ } else {
+ for (int i = 0; i < key->nr_color_regions; ++i) {
+ write = emit(FS_OPCODE_REP_FB_WRITE);
+ write->saturate = key->clamp_fragment_color;
+ write->base_mrf = base_mrf;
+ write->target = i;
+ write->header_present = true;
+ write->mlen = 3;
}
}
+ write->eot = true;
- return;
+ assign_constant_locations();
+ assign_curb_setup();
+
+ /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
+ assert(mov->src[0].file == HW_REG);
+ mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
}
/**
@@ -3189,6 +3133,9 @@ fs_visitor::run()
if (0) {
emit_dummy_fs();
+ } else if (brw->use_rep_send && dispatch_width == 16) {
+ emit_repclear_shader();
+ allocated_without_spills = true;
} else {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_begin();
@@ -3361,9 +3308,6 @@ fs_visitor::run()
prog_data->base.total_scratch = brw_get_scratch_size(last_scratch);
}
- if (brw->use_rep_send)
- try_rep_send();
-
if (dispatch_width == 8)
prog_data->reg_blocks = brw_register_blocks(grf_used);
else
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d40a2e3..d5a96c8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -356,12 +356,11 @@ public:
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
- void try_rep_send();
-
void push_force_uncompressed();
void pop_force_uncompressed();
void emit_dummy_fs();
+ void emit_repclear_shader();
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
glsl_interp_qualifier interpolation_mode,
--
2.1.0
More information about the mesa-dev
mailing list