[Mesa-dev] [PATCH] i965/fs: Remove try_replace_with_sel().

Matt Turner mattst88 at gmail.com
Tue Nov 11 09:41:14 PST 2014


The rest of our backend optimizations have replaced the need for this
since it was written.

instructions in affected programs:     30626 -> 30564 (-0.20%)

Hurts a small number of CSGO shaders by one instruction, but helps even
more. Hurts two by a larger number because of something I noticed when I
first wrote the SEL peephole: try_replace_with_sel() operates on
instructions before we've demoted uniforms to pull constants. So code
like

   var.x = ( -abs(r6.w) >= 0.0 ) ? pc[82].x : r9.x;
   var.y = ( -abs(r6.w) >= 0.0 ) ? pc[82].y : r9.y;
   var.z = ( -abs(r6.w) >= 0.0 ) ? pc[82].z : r9.z;
   var.w = ( -abs(r6.w) >= 0.0 ) ? pc[82].w : r9.w;

where pc[82] gets demoted to a pull constant, we end up emitting a
send(4) instruction to load pc[82] each time, and since they're in
different basic blocks because we mishandle the ternary operator in this
case we can't combine them. Once we handle this common ternary pattern
better the problem will go away.
---
 src/mesa/drivers/dri/i965/brw_fs.h           |  1 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 87 ----------------------------
 2 files changed, 88 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d9150c3..1c14d13 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -510,7 +510,6 @@ public:
                     const fs_reg &src0, const fs_reg &src1);
    bool try_emit_saturate(ir_expression *ir);
    bool try_emit_mad(ir_expression *ir);
-   void try_replace_with_sel();
    bool opt_peephole_sel();
    bool opt_peephole_predicated_break();
    bool opt_saturate_propagation();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4e1badd..d4f08aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2565,91 +2565,6 @@ fs_visitor::emit_if_gen6(ir_if *ir)
    emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ));
 }
 
-/**
- * Try to replace IF/MOV/ELSE/MOV/ENDIF with SEL.
- *
- * Many GLSL shaders contain the following pattern:
- *
- *    x = condition ? foo : bar
- *
- * The compiler emits an ir_if tree for this, since each subexpression might be
- * a complex tree that could have side-effects or short-circuit logic.
- *
- * However, the common case is to simply select one of two constants or
- * variable values---which is exactly what SEL is for.  In this case, the
- * assembly looks like:
- *
- *    (+f0) IF
- *    MOV dst src0
- *    ELSE
- *    MOV dst src1
- *    ENDIF
- *
- * which can be easily translated into:
- *
- *    (+f0) SEL dst src0 src1
- *
- * If src0 is an immediate value, we promote it to a temporary GRF.
- */
-void
-fs_visitor::try_replace_with_sel()
-{
-   fs_inst *endif_inst = (fs_inst *) instructions.get_tail();
-   assert(endif_inst->opcode == BRW_OPCODE_ENDIF);
-
-   /* Pattern match in reverse: IF, MOV, ELSE, MOV, ENDIF. */
-   int opcodes[] = {
-      BRW_OPCODE_IF, BRW_OPCODE_MOV, BRW_OPCODE_ELSE, BRW_OPCODE_MOV,
-   };
-
-   fs_inst *match = (fs_inst *) endif_inst->prev;
-   for (int i = 0; i < 4; i++) {
-      if (match->is_head_sentinel() || match->opcode != opcodes[4-i-1])
-         return;
-      match = (fs_inst *) match->prev;
-   }
-
-   /* The opcodes match; it looks like the right sequence of instructions. */
-   fs_inst *else_mov = (fs_inst *) endif_inst->prev;
-   fs_inst *then_mov = (fs_inst *) else_mov->prev->prev;
-   fs_inst *if_inst = (fs_inst *) then_mov->prev;
-
-   /* Check that the MOVs are the right form. */
-   if (then_mov->dst.equals(else_mov->dst) &&
-       !then_mov->is_partial_write() &&
-       !else_mov->is_partial_write()) {
-
-      /* Remove the matched instructions; we'll emit a SEL to replace them. */
-      while (!if_inst->next->is_tail_sentinel())
-         if_inst->next->exec_node::remove();
-      if_inst->exec_node::remove();
-
-      /* Only the last source register can be a constant, so if the MOV in
-       * the "then" clause uses a constant, we need to put it in a temporary.
-       */
-      fs_reg src0(then_mov->src[0]);
-      if (src0.file == IMM) {
-         src0 = fs_reg(this, glsl_type::float_type);
-         src0.type = then_mov->src[0].type;
-         emit(MOV(src0, then_mov->src[0]));
-      }
-
-      fs_inst *sel;
-      if (if_inst->conditional_mod) {
-         /* Sandybridge-specific IF with embedded comparison */
-         emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
-                  if_inst->conditional_mod));
-         sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
-         sel->predicate = BRW_PREDICATE_NORMAL;
-      } else {
-         /* Separate CMP and IF instructions */
-         sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
-         sel->predicate = if_inst->predicate;
-         sel->predicate_inverse = if_inst->predicate_inverse;
-      }
-   }
-}
-
 void
 fs_visitor::visit(ir_if *ir)
 {
@@ -2685,8 +2600,6 @@ fs_visitor::visit(ir_if *ir)
    }
 
    emit(BRW_OPCODE_ENDIF);
-
-   try_replace_with_sel();
 }
 
 void
-- 
2.0.4



More information about the mesa-dev mailing list