[Mesa-dev] [PATCH 2/5] i965/fs: Emit better b2f of an expression on GEN4 and GEN5
Ian Romanick
idr at freedesktop.org
Wed Mar 11 13:44:13 PDT 2015
From: Ian Romanick <ian.d.romanick at intel.com>
On platforms that do not natively generate 0u and ~0u for Boolean
results, b2f expressions that look like
f = b2f(expr cmp 0)
will generate better code by pretending the expression is
f = ir_triop_sel(0.0, 1.0, expr cmp 0)
This is because the last instruction of "expr" can generate the
condition code for the "cmp 0". This avoids having to do the "-(b & 1)"
trick to generate 0u or ~0u for the Boolean result. This means code like
mov(16) g16<1>F 1F
mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F
(+f0) sel(16) m6<1>F g16<8,8,1>F 0F
will be generated instead of
mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F
cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F
and(16) g4<1>D g2<8,8,1>D 1D
and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD
v2: When the comparison is either == 0.0 or != 0.0 use the knowledge
that the true (or false) case already results in zero would allow better
code generation by possibly avoiding a load-immediate instruction.
v3: Apply the optimization even when neither comparitor is zero.
Shader-db results:
GM45 (0x2A42):
total instructions in shared programs: 3551002 -> 3550829 (-0.00%)
instructions in affected programs: 33269 -> 33096 (-0.52%)
helped: 121
Iron Lake (0x0046):
total instructions in shared programs: 4993327 -> 4993146 (-0.00%)
instructions in affected programs: 34199 -> 34018 (-0.53%)
helped: 129
No change on other platforms.
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
Cc: Tapani Palli <tapani.palli at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 +
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 101 +++++++++++++++++++++++++--
2 files changed, 99 insertions(+), 4 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d9d5858..075e90c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -307,6 +307,7 @@ public:
const fs_reg &a);
void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1);
+ bool try_emit_b2f_of_comparison(ir_expression *ir);
bool try_emit_saturate(ir_expression *ir);
bool try_emit_line(ir_expression *ir);
bool try_emit_mad(ir_expression *ir);
@@ -317,6 +318,7 @@ public:
bool opt_saturate_propagation();
bool opt_cmod_propagation();
void emit_bool_to_cond_code(ir_rvalue *condition);
+ void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]);
void emit_if_gen6(ir_if *ir);
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3025a9d..3d79796 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -475,6 +475,87 @@ fs_visitor::try_emit_mad(ir_expression *ir)
return true;
}
+bool
+fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir)
+{
+ /* On platforms that do not natively generate 0u and ~0u for Boolean
+ * results, b2f expressions that look like
+ *
+ * f = b2f(expr cmp 0)
+ *
+ * will generate better code by pretending the expression is
+ *
+ * f = ir_triop_csel(0.0, 1.0, expr cmp 0)
+ *
+ * This is because the last instruction of "expr" can generate the
+ * condition code for the "cmp 0". This avoids having to do the "-(b & 1)"
+ * trick to generate 0u or ~0u for the Boolean result. This means code like
+ *
+ * mov(16) g16<1>F 1F
+ * mul.ge.f0(16) null g6<8,8,1>F g14<8,8,1>F
+ * (+f0) sel(16) m6<1>F g16<8,8,1>F 0F
+ *
+ * will be generated instead of
+ *
+ * mul(16) g2<1>F g12<8,8,1>F g4<8,8,1>F
+ * cmp.ge.f0(16) g2<1>D g4<8,8,1>F 0F
+ * and(16) g4<1>D g2<8,8,1>D 1D
+ * and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD
+ *
+ * When the comparison is either == 0.0 or != 0.0 using the knowledge that
+ * the true (or false) case already results in zero would allow better code
+ * generation by possibly avoiding a load-immediate instruction.
+ */
+ ir_expression *cmp = ir->operands[0]->as_expression();
+ if (cmp == NULL)
+ return false;
+
+ if (cmp->operation == ir_binop_equal || cmp->operation == ir_binop_nequal) {
+ for (unsigned i = 0; i < 2; i++) {
+ ir_constant *c = cmp->operands[i]->as_constant();
+ if (c == NULL || !c->is_zero())
+ continue;
+
+ ir_expression *expr = cmp->operands[i ^ 1]->as_expression();
+ if (expr != NULL) {
+ fs_reg op[2];
+
+ for (unsigned j = 0; j < 2; j++) {
+ cmp->operands[j]->accept(this);
+ op[j] = this->result;
+
+ resolve_ud_negate(&op[j]);
+ }
+
+ emit_bool_to_cond_code_of_reg(cmp, op);
+
+ /* In this case we know when the condition is true, op[i ^ 1]
+ * contains zero. Invert the predicate, use op[i ^ 1] as src0,
+ * and immediate 1.0f as src1.
+ */
+ this->result = vgrf(ir->type);
+ op[i ^ 1].type = BRW_REGISTER_TYPE_F;
+
+ fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = cmp->operation == ir_binop_equal;
+ return true;
+ }
+ }
+ }
+
+ emit_bool_to_cond_code(cmp);
+
+ fs_reg temp = vgrf(ir->type);
+ emit(MOV(temp, fs_reg(1.0f)));
+
+ this->result = vgrf(ir->type);
+ fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ return true;
+}
+
static int
pack_pixel_offset(float x)
{
@@ -639,6 +720,11 @@ fs_visitor::visit(ir_expression *ir)
inst->predicate = BRW_PREDICATE_NORMAL;
return;
+ case ir_unop_b2f:
+ if (brw->gen <= 5 && try_emit_b2f_of_comparison(ir))
+ return;
+ break;
+
case ir_unop_interpolate_at_centroid:
case ir_binop_interpolate_at_offset:
case ir_binop_interpolate_at_sample:
@@ -2525,7 +2611,6 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
}
fs_reg op[3];
- fs_inst *inst;
assert(expr->get_num_operands() <= 3);
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
@@ -2537,6 +2622,14 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
resolve_ud_negate(&op[i]);
}
+ emit_bool_to_cond_code_of_reg(expr, op);
+}
+
+void
+fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3])
+{
+ fs_inst *inst;
+
switch (expr->operation) {
case ir_unop_logic_not:
inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
@@ -2545,7 +2638,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
case ir_binop_logic_xor:
if (brw->gen <= 5) {
- fs_reg temp = vgrf(ir->type);
+ fs_reg temp = vgrf(expr->type);
emit(XOR(temp, op[0], op[1]));
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
} else {
@@ -2556,7 +2649,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
case ir_binop_logic_or:
if (brw->gen <= 5) {
- fs_reg temp = vgrf(ir->type);
+ fs_reg temp = vgrf(expr->type);
emit(OR(temp, op[0], op[1]));
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
} else {
@@ -2567,7 +2660,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
case ir_binop_logic_and:
if (brw->gen <= 5) {
- fs_reg temp = vgrf(ir->type);
+ fs_reg temp = vgrf(expr->type);
emit(AND(temp, op[0], op[1]));
inst = emit(AND(reg_null_d, temp, fs_reg(1)));
} else {
--
2.1.0
More information about the mesa-dev
mailing list