<div dir="ltr"><div>This series is<br></div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Dec 4, 2014 at 3:05 PM, Matt Turner <span dir="ltr"><<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Jason realized that we could fix the result of the CMP instruction on<br>
Gen <= 5 by doing -(result & 1). Also do the resolves in the vec4<br>
backend before use, rather than when the bool was created. The FS does<br>
this and it saves some unnecessary resolves.<br>
<br>
On Ironlake:<br>
<br>
total instructions in shared programs: 4289762 -> 4287277 (-0.06%)<br>
instructions in affected programs: 619430 -> 616945 (-0.40%)<br>
---<br>
src/mesa/drivers/dri/i965/brw_context.c | 14 +--<br>
src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +-<br>
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 70 +++++++--------<br>
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +<br>
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 118 +++++++++++++++----------<br>
5 files changed, 108 insertions(+), 102 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c<br>
index 5830b6e..ee9684b 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_context.c<br>
+++ b/src/mesa/drivers/dri/i965/brw_context.c<br>
@@ -516,18 +516,10 @@ brw_initialize_context_constants(struct brw_context *brw)<br>
* contains meaning [sic] data, software should make sure all higher bits<br>
* are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."<br>
*<br>
- * We select the representation of a true boolean uniform to match what the<br>
- * CMP instruction returns.<br>
- *<br>
- * The Sandybridge BSpec's description of the CMP instruction matches that<br>
- * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems<br>
- * to have not been updated from Ironlake). Its CMP instruction behaves like<br>
- * Ivybridge and newer.<br>
+ * We select the representation of a true boolean uniform to be ~0, and fix<br>
+ * the results of Gen <= 5 CMP instruction's with -(result & 1).<br>
*/<br>
- if (brw->gen >= 6)<br>
- ctx->Const.UniformBooleanTrue = ~0;<br>
- else<br>
- ctx->Const.UniformBooleanTrue = 1;<br>
+ ctx->Const.UniformBooleanTrue = ~0;<br>
<br>
/* From the gen4 PRM, volume 4 page 127:<br>
*<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
index c6cd73b..37857e9 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp<br>
@@ -1399,15 +1399,12 @@ fs_visitor::emit_frontfacing_interpolation()<br>
* instruction only operates on UD (or D with an abs source modifier)<br>
* sources without negation.<br>
*<br>
- * Instead, use ASR (which will give ~0/true or 0/false) followed by an<br>
- * AND 1.<br>
+ * Instead, use ASR (which will give ~0/true or 0/false).<br>
*/<br>
- fs_reg asr = fs_reg(this, glsl_type::bool_type);<br>
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));<br>
g1_6.negate = true;<br>
<br>
- emit(ASR(asr, g1_6, fs_reg(31)));<br>
- emit(AND(*reg, asr, fs_reg(1)));<br>
+ emit(ASR(*reg, g1_6, fs_reg(31)));<br>
}<br>
<br>
return reg;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
index e854056..e54d957 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
@@ -534,11 +534,7 @@ fs_visitor::visit(ir_expression *ir)<br>
<br>
switch (ir->operation) {<br>
case ir_unop_logic_not:<br>
- if (ctx->Const.UniformBooleanTrue != 1) {<br>
- emit(NOT(this->result, op[0]));<br>
- } else {<br>
- emit(XOR(this->result, op[0], fs_reg(1)));<br>
- }<br>
+ emit(NOT(this->result, op[0]));<br>
break;<br>
case ir_unop_neg:<br>
op[0].negate = !op[0].negate;<br>
@@ -744,7 +740,7 @@ fs_visitor::visit(ir_expression *ir)<br>
case ir_binop_all_equal:<br>
case ir_binop_nequal:<br>
case ir_binop_any_nequal:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
+ if (brw->gen <= 5) {<br>
resolve_bool_comparison(ir->operands[0], &op[0]);<br>
resolve_bool_comparison(ir->operands[1], &op[1]);<br>
}<br>
@@ -818,16 +814,13 @@ fs_visitor::visit(ir_expression *ir)<br>
emit(AND(this->result, op[0], fs_reg(1)));<br>
break;<br>
case ir_unop_b2f:<br>
- if (ctx->Const.UniformBooleanTrue != 1) {<br>
- op[0].type = BRW_REGISTER_TYPE_D;<br>
- this->result.type = BRW_REGISTER_TYPE_D;<br>
- emit(AND(this->result, op[0], fs_reg(0x3f800000u)));<br>
- this->result.type = BRW_REGISTER_TYPE_F;<br>
- } else {<br>
- temp = fs_reg(this, glsl_type::int_type);<br>
- emit(AND(temp, op[0], fs_reg(1)));<br>
- emit(MOV(this->result, temp));<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(ir->operands[0], &op[0]);<br>
}<br>
+ op[0].type = BRW_REGISTER_TYPE_D;<br>
+ this->result.type = BRW_REGISTER_TYPE_D;<br>
+ emit(AND(this->result, op[0], fs_reg(0x3f800000u)));<br>
+ this->result.type = BRW_REGISTER_TYPE_F;<br>
break;<br>
<br>
case ir_unop_f2b:<br>
@@ -2393,39 +2386,36 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)<br>
break;<br>
<br>
case ir_binop_logic_xor:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- fs_reg dst = fs_reg(this, glsl_type::uint_type);<br>
- emit(XOR(dst, op[0], op[1]));<br>
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ fs_reg temp = fs_reg(this, ir->type);<br>
+ emit(XOR(temp, op[0], op[1]));<br>
+ inst = emit(AND(reg_null_d, temp, fs_reg(1)));<br>
} else {<br>
inst = emit(XOR(reg_null_d, op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
}<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_binop_logic_or:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- fs_reg dst = fs_reg(this, glsl_type::uint_type);<br>
- emit(OR(dst, op[0], op[1]));<br>
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ fs_reg temp = fs_reg(this, ir->type);<br>
+ emit(OR(temp, op[0], op[1]));<br>
+ inst = emit(AND(reg_null_d, temp, fs_reg(1)));<br>
} else {<br>
inst = emit(OR(reg_null_d, op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
}<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_binop_logic_and:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- fs_reg dst = fs_reg(this, glsl_type::uint_type);<br>
- emit(AND(dst, op[0], op[1]));<br>
- inst = emit(AND(reg_null_d, dst, fs_reg(1)));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ fs_reg temp = fs_reg(this, ir->type);<br>
+ emit(AND(temp, op[0], op[1]));<br>
+ inst = emit(AND(reg_null_d, temp, fs_reg(1)));<br>
} else {<br>
inst = emit(AND(reg_null_d, op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
}<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_unop_f2b:<br>
@@ -2454,7 +2444,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)<br>
case ir_binop_all_equal:<br>
case ir_binop_nequal:<br>
case ir_binop_any_nequal:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
+ if (brw->gen <= 5) {<br>
resolve_bool_comparison(expr->operands[0], &op[0]);<br>
resolve_bool_comparison(expr->operands[1], &op[1]);<br>
}<br>
@@ -2544,7 +2534,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)<br>
case ir_binop_all_equal:<br>
case ir_binop_nequal:<br>
case ir_binop_any_nequal:<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
+ if (brw->gen <= 5) {<br>
resolve_bool_comparison(expr->operands[0], &op[0]);<br>
resolve_bool_comparison(expr->operands[1], &op[1]);<br>
}<br>
@@ -3414,14 +3404,16 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)<br>
void<br>
fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)<br>
{<br>
- assert(ctx->Const.UniformBooleanTrue == 1);<br>
+ assert(brw->gen <= 5);<br>
<br>
if (rvalue->type != glsl_type::bool_type)<br>
return;<br>
<br>
- fs_reg temp = fs_reg(this, glsl_type::bool_type);<br>
- emit(AND(temp, *reg, fs_reg(1)));<br>
- *reg = temp;<br>
+ fs_reg and_result = fs_reg(this, glsl_type::bool_type);<br>
+ fs_reg neg_result = fs_reg(this, glsl_type::bool_type);<br>
+ emit(AND(and_result, *reg, fs_reg(1)));<br>
+ emit(MOV(neg_result, negate(and_result)));<br>
+ *reg = neg_result;<br>
}<br>
<br>
fs_visitor::fs_visitor(struct brw_context *brw,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
index d94c323..5270027 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h<br>
@@ -535,6 +535,7 @@ public:<br>
bool try_emit_mad(ir_expression *ir);<br>
bool try_emit_b2f_of_compare(ir_expression *ir);<br>
void resolve_ud_negate(src_reg *reg);<br>
+ void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);<br>
<br>
src_reg get_timestamp();<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp<br>
index 8a0a7e4..fe9f417 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp<br>
@@ -819,18 +819,36 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,<br>
break;<br>
<br>
case ir_binop_logic_xor:<br>
- inst = emit(XOR(dst_null_d(), op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ src_reg temp = src_reg(this, ir->type);<br>
+ emit(XOR(dst_reg(temp), op[0], op[1]));<br>
+ inst = emit(AND(dst_null_d(), temp, src_reg(1)));<br>
+ } else {<br>
+ inst = emit(XOR(dst_null_d(), op[0], op[1]));<br>
+ }<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_binop_logic_or:<br>
- inst = emit(OR(dst_null_d(), op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ src_reg temp = src_reg(this, ir->type);<br>
+ emit(OR(dst_reg(temp), op[0], op[1]));<br>
+ inst = emit(AND(dst_null_d(), temp, src_reg(1)));<br>
+ } else {<br>
+ inst = emit(OR(dst_null_d(), op[0], op[1]));<br>
+ }<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_binop_logic_and:<br>
- inst = emit(AND(dst_null_d(), op[0], op[1]));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
+ if (brw->gen <= 5) {<br>
+ src_reg temp = src_reg(this, ir->type);<br>
+ emit(AND(dst_reg(temp), op[0], op[1]));<br>
+ inst = emit(AND(dst_null_d(), temp, src_reg(1)));<br>
+ } else {<br>
+ inst = emit(AND(dst_null_d(), op[0], op[1]));<br>
+ }<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
break;<br>
<br>
case ir_unop_f2b:<br>
@@ -852,16 +870,27 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,<br>
break;<br>
<br>
case ir_binop_all_equal:<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(expr->operands[0], &op[0]);<br>
+ resolve_bool_comparison(expr->operands[1], &op[1]);<br>
+ }<br>
inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));<br>
*predicate = BRW_PREDICATE_ALIGN16_ALL4H;<br>
break;<br>
<br>
case ir_binop_any_nequal:<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(expr->operands[0], &op[0]);<br>
+ resolve_bool_comparison(expr->operands[1], &op[1]);<br>
+ }<br>
inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));<br>
*predicate = BRW_PREDICATE_ALIGN16_ANY4H;<br>
break;<br>
<br>
case ir_unop_any:<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(expr->operands[0], &op[0]);<br>
+ }<br>
inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));<br>
*predicate = BRW_PREDICATE_ALIGN16_ANY4H;<br>
break;<br>
@@ -872,6 +901,10 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,<br>
case ir_binop_lequal:<br>
case ir_binop_equal:<br>
case ir_binop_nequal:<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(expr->operands[0], &op[0]);<br>
+ resolve_bool_comparison(expr->operands[1], &op[1]);<br>
+ }<br>
emit(CMP(dst_null_d(), op[0], op[1],<br>
brw_conditional_for_comparison(expr->operation)));<br>
break;<br>
@@ -902,14 +935,8 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,<br>
<br>
resolve_ud_negate(&this->result);<br>
<br>
- if (brw->gen >= 6) {<br>
- vec4_instruction *inst = emit(AND(dst_null_d(),<br>
- this->result, src_reg(1)));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
- } else {<br>
- vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));<br>
- inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
- }<br>
+ vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1)));<br>
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;<br>
}<br>
<br>
/**<br>
@@ -1320,11 +1347,7 @@ vec4_visitor::visit(ir_expression *ir)<br>
<br>
switch (ir->operation) {<br>
case ir_unop_logic_not:<br>
- if (ctx->Const.UniformBooleanTrue != 1) {<br>
- emit(NOT(result_dst, op[0]));<br>
- } else {<br>
- emit(XOR(result_dst, op[0], src_reg(1)));<br>
- }<br>
+ emit(NOT(result_dst, op[0]));<br>
break;<br>
case ir_unop_neg:<br>
op[0].negate = !op[0].negate;<br>
@@ -1510,11 +1533,12 @@ vec4_visitor::visit(ir_expression *ir)<br>
case ir_binop_gequal:<br>
case ir_binop_equal:<br>
case ir_binop_nequal: {<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(ir->operands[0], &op[0]);<br>
+ resolve_bool_comparison(ir->operands[1], &op[1]);<br>
+ }<br>
emit(CMP(result_dst, op[0], op[1],<br>
brw_conditional_for_comparison(ir->operation)));<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- emit(AND(result_dst, result_src, src_reg(1)));<br>
- }<br>
break;<br>
}<br>
<br>
@@ -1528,9 +1552,6 @@ vec4_visitor::visit(ir_expression *ir)<br>
inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;<br>
} else {<br>
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- emit(AND(result_dst, result_src, src_reg(1)));<br>
- }<br>
}<br>
break;<br>
case ir_binop_any_nequal:<br>
@@ -1544,9 +1565,6 @@ vec4_visitor::visit(ir_expression *ir)<br>
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;<br>
} else {<br>
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- emit(AND(result_dst, result_src, src_reg(1)));<br>
- }<br>
}<br>
break;<br>
<br>
@@ -1608,28 +1626,22 @@ vec4_visitor::visit(ir_expression *ir)<br>
emit(MOV(result_dst, op[0]));<br>
break;<br>
case ir_unop_b2i:<br>
- if (ctx->Const.UniformBooleanTrue != 1) {<br>
- emit(AND(result_dst, op[0], src_reg(1)));<br>
- } else {<br>
- emit(MOV(result_dst, op[0]));<br>
- }<br>
+ emit(AND(result_dst, op[0], src_reg(1)));<br>
break;<br>
case ir_unop_b2f:<br>
- if (ctx->Const.UniformBooleanTrue != 1) {<br>
- op[0].type = BRW_REGISTER_TYPE_D;<br>
- result_dst.type = BRW_REGISTER_TYPE_D;<br>
- emit(AND(result_dst, op[0], src_reg(0x3f800000u)));<br>
- result_dst.type = BRW_REGISTER_TYPE_F;<br>
- } else {<br>
- emit(MOV(result_dst, op[0]));<br>
+ if (brw->gen <= 5) {<br>
+ resolve_bool_comparison(ir->operands[0], &op[0]);<br>
}<br>
+ op[0].type = BRW_REGISTER_TYPE_D;<br>
+ result_dst.type = BRW_REGISTER_TYPE_D;<br>
+ emit(AND(result_dst, op[0], src_reg(0x3f800000u)));<br>
+ result_dst.type = BRW_REGISTER_TYPE_F;<br>
break;<br>
case ir_unop_f2b:<br>
- case ir_unop_i2b:<br>
emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- emit(AND(result_dst, result_src, src_reg(1)));<br>
- }<br>
+ break;<br>
+ case ir_unop_i2b:<br>
+ emit(AND(result_dst, op[0], src_reg(1)));<br>
break;<br>
<br>
case ir_unop_trunc:<br>
@@ -1775,9 +1787,6 @@ vec4_visitor::visit(ir_expression *ir)<br>
if (ir->type->base_type == GLSL_TYPE_BOOL) {<br>
emit(CMP(result_dst, packed_consts, src_reg(0u),<br>
BRW_CONDITIONAL_NZ));<br>
- if (ctx->Const.UniformBooleanTrue == 1) {<br>
- emit(AND(result_dst, result, src_reg(1)));<br>
- }<br>
} else {<br>
emit(MOV(result_dst, packed_consts));<br>
}<br>
@@ -3533,6 +3542,21 @@ vec4_visitor::resolve_ud_negate(src_reg *reg)<br>
*reg = temp;<br>
}<br>
<br>
+void<br>
+vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)<br>
+{<br>
+ assert(brw->gen <= 5);<br>
+<br>
+ if (!rvalue->type->is_boolean())<br>
+ return;<br>
+<br>
+ src_reg and_result = src_reg(this, rvalue->type);<br>
+ src_reg neg_result = src_reg(this, rvalue->type);<br>
+ emit(AND(dst_reg(and_result), *reg, src_reg(1)));<br>
+ emit(MOV(dst_reg(neg_result), negate(and_result)));<br>
+ *reg = neg_result;<br>
+}<br>
+<br>
vec4_visitor::vec4_visitor(struct brw_context *brw,<br>
struct brw_vec4_compile *c,<br>
struct gl_program *prog,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.0.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>