<div dir="ltr">On 3 October 2013 10:59, Matt Turner <span dir="ltr"><<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="im">v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.<br>
</div>v3: Store the bool emitted_addc_or_subb in the class, not static.<br>
---<br>
src/mesa/drivers/dri/i965/brw_fs.h | 3 +<br>
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 104 +++++++++++++++++++++++++++<br>
2 files changed, 107 insertions(+)<br></blockquote><div><br></div><div>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h<br>
index 6a53e59..c703c2b 100644<br>
<div class="im">--- a/src/mesa/drivers/dri/i965/brw_fs.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs.h<br>
@@ -345,6 +345,7 @@ public:<br>
fs_reg src0, fs_reg src1);<br>
bool try_emit_saturate(ir_expression *ir);<br>
bool try_emit_mad(ir_expression *ir, int mul_arg);<br>
+ void try_combine_add_with_addc_subb();<br>
void try_replace_with_sel();<br>
void emit_bool_to_cond_code(ir_rvalue *condition);<br>
void emit_if_gen6(ir_if *ir);<br>
</div>@@ -458,6 +459,8 @@ public:<br>
<br>
int force_uncompressed_stack;<br>
int force_sechalf_stack;<br>
+<br>
+ bool emitted_addc_or_subb;<br>
};<br>
<br>
/**<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
index b8c30e6..8accbd6 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
@@ -313,6 +313,102 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)<br>
<div><div class="h5"> return true;<br>
}<br>
<br>
+/**<br>
+ * The addition and carry in the uaddCarry() built-in function are implemented<br>
+ * separately as ir_binop_add and ir_binop_carry respectively. i965 generates<br>
+ * ADDC and a MOV from the accumulator for the carry.<br>
+ *<br>
+ * The generated code for uaddCarry(uint x, uint y, out uint carry) would look<br>
+ * like this:<br>
+ *<br>
+ * addc null, x, y<br>
+ * mov carry, acc0<br>
+ * add sum, x, y<br>
+ *<br>
+ * This peephole pass optimizes this into<br>
+ *<br>
+ * addc sum, x, y<br>
+ * mov carry, acc0<br>
+ *<br>
+ * usubBorrow() works in the same fashion.<br>
+ */<br>
+void<br>
+fs_visitor::try_combine_add_with_addc_subb()<br>
+{<br>
+ /* ADDC/SUBB was introduced in gen7. */<br>
+ if (brw->gen < 7)<br>
+ return;<br>
+<br>
+ fs_inst *add_inst = (fs_inst *) instructions.get_tail();<br>
+ assert(add_inst->opcode == BRW_OPCODE_ADD);<br>
+<br>
+ /* ADDC/SUBB only operates on UD. */<br>
+ if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||<br>
+ add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||<br>
+ add_inst->src[1].type != BRW_REGISTER_TYPE_UD)<br>
+ return;<br>
+<br>
+ bool found = false;<br>
+ fs_inst *match = (fs_inst *) add_inst->prev;<br>
+ /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB<br>
+ * should appear farther away because of the extra MOV negates.<br>
+ */<br>
+ for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) {<br>
+ if (match->is_head_sentinel())<br>
+ return;<br>
+<br>
+ /* Look for an ADDC/SUBB instruction whose destination is the null<br>
+ * register (ir_binop_carry emits ADDC with null destination; same for<br>
+ * ir_binop_borrow with SUBB) and whose sources are identical to those<br>
+ * of the ADD.<br>
+ */<br>
+ if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)<br>
+ continue;<br>
+<br>
+ /* Only look for newly emitted ADDC/SUBB with null destination. */<br>
+ if (match->dst.file != HW_REG ||<br>
+ match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||<br>
+ match-><a href="http://dst.fixed_hw_reg.nr" target="_blank">dst.fixed_hw_reg.nr</a> != BRW_ARF_NULL)<br>
+ continue;<br>
+<br>
+ fs_reg *src0 = &add_inst->src[0];<br>
+ fs_reg *src1 = &add_inst->src[1];<br>
+<br>
+ /* For SUBB, the ADD's second source will contain a negate modifier<br>
+ * which at this point will be in the form of a<br>
+ *<br>
+ * MOV dst, -src<br>
+ *<br>
+ * instruction, so src[1].file will be GRF, even if it's a uniform push<br>
+ * constant.<br>
+ */<br>
+ if (match->src[1].reg != add_inst->src[1].reg) {<br>
+ /* The negating MOV should be immediately before the ADD. */<br>
+ fs_inst *mov_inst = (fs_inst *) add_inst->prev;<br>
+ if (mov_inst->opcode != BRW_OPCODE_MOV)<br>
+ continue;<br>
+<br>
+ src1 = &mov_inst->src[0];<br>
+ }<br>
+<br>
+ /* If everything matches, we're done. */<br>
+ if (match->src[0].file == src0->file &&<br>
+ match->src[1].file == src1->file &&<br>
+ match->src[0].reg == src0->reg &&<br>
+ match->src[1].reg == src1->reg &&<br>
+ match->src[0].reg_offset == src0->reg_offset &&<br>
+ match->src[1].reg_offset == src1->reg_offset) {<br>
+ found = true;<br>
+ break;<br>
+ }<br>
+ }<br>
+<br>
+ if (found) {<br>
+ match->dst = add_inst->dst;<br>
+ add_inst->remove();<br>
+ }<br>
+}<br>
+<br>
void<br>
fs_visitor::visit(ir_expression *ir)<br>
{<br>
</div></div>@@ -415,6 +511,8 @@ fs_visitor::visit(ir_expression *ir)<br>
<div class="im"><br>
case ir_binop_add:<br>
emit(ADD(this->result, op[0], op[1]));<br>
+ if (emitted_addc_or_subb)<br>
+ try_combine_add_with_addc_subb();<br>
break;<br>
case ir_binop_sub:<br>
assert(!"not reached: should be handled by ir_sub_to_add_neg");<br>
</div>@@ -451,6 +549,8 @@ fs_visitor::visit(ir_expression *ir)<br>
<div class="im"> if (brw->gen >= 7 && dispatch_width == 16)<br>
fail("16-wide explicit accumulator operands unsupported\n");<br>
<br>
+ emitted_addc_or_subb = true;<br>
+<br>
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);<br>
<br>
emit(ADDC(reg_null_ud, op[0], op[1]));<br>
</div>@@ -461,6 +561,8 @@ fs_visitor::visit(ir_expression *ir)<br>
<div class="im"> if (brw->gen >= 7 && dispatch_width == 16)<br>
fail("16-wide explicit accumulator operands unsupported\n");<br>
<br>
+ emitted_addc_or_subb = true;<br>
+<br>
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);<br>
<br>
emit(SUBB(reg_null_ud, op[0], op[1]));<br>
</div>@@ -2610,6 +2712,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,<br>
this->force_uncompressed_stack = 0;<br>
this->force_sechalf_stack = 0;<br>
<br>
+ this->emitted_addc_or_subb = false;<br>
+<br>
memset(&this->param_size, 0, sizeof(this->param_size));<br>
<div class="HOEnZb"><div class="h5"> }<br>
<br>
--<br>
1.8.3.2<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</div></div></blockquote></div><br></div></div>