[Mesa-dev] [PATCH 6/8] i965: Use ~0 to represent true on Gen >= 6.
Matt Turner
mattst88 at gmail.com
Sun Aug 10 16:18:36 PDT 2014
total instructions in shared programs: 4292303 -> 4288650 (-0.09%)
instructions in affected programs: 299670 -> 296017 (-1.22%)
---
TODO: test SNB and confirm that the BSpec is right and the PRM is wrong.
src/mesa/drivers/dri/i965/brw_context.c | 27 +++++++++++-
src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +--
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 42 ++++++++++++------
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 61 +++++++++++++++++++-------
4 files changed, 102 insertions(+), 34 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 52f2557..38e83ff 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -457,7 +457,32 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.QuadsFollowProvokingVertexConvention = false;
ctx->Const.NativeIntegers = true;
- ctx->Const.UniformBooleanTrue = 1;
+
+ /* Regarding the CMP instruction, the Ivybridge PRM says:
+ *
+ * "For each enabled channel 0b or 1b is assigned to the appropriate flag
+ * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
+ * 0xFFFFFFFF) is assigned to dst."
+ *
+ * but PRMs for earlier generations say
+ *
+ * "In dword format, one GRF may store up to 8 results. When the register
+ * is used later as a vector of Booleans, as only LSB at each channel
+ * contains meaning [sic] data, software should make sure all higher bits
+ * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
+ *
+ * We select the representation of a true boolean uniform to match what the
+ * CMP instruction returns.
+ *
+ * The Sandybridge BSpec's description of the CMP instruction matches that
+ * of the Ivybridge PRM. (The description in the Sandybridge PRM is seems
+ * to have not been updated from Ironlake). Its CMP instruction behaves like
+ * Ivybridge and newer.
+ */
+ if (brw->gen >= 6)
+ ctx->Const.UniformBooleanTrue = ~0;
+ else
+ ctx->Const.UniformBooleanTrue = 1;
/* From the gen4 PRM, volume 4 page 127:
*
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bf95b57..f394ec9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1190,11 +1190,11 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
/* The frontfacing comes in as a bit in the thread payload. */
if (brw->gen >= 6) {
- emit(BRW_OPCODE_ASR, *reg,
+ emit(BRW_OPCODE_SHL, *reg,
fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(15));
+ fs_reg(16));
emit(BRW_OPCODE_NOT, *reg, *reg);
- emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1));
+ emit(BRW_OPCODE_ASR, *reg, *reg, fs_reg(31));
} else {
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
/* bit 31 is "primitive is back face", so checking < (1 << 31) gives
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 869c1e3..c022dc3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -526,10 +526,11 @@ fs_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
- /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
- * ones complement of the whole register, not just bit 0.
- */
- emit(XOR(this->result, op[0], fs_reg(1)));
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ emit(NOT(this->result, op[0]));
+ } else {
+ emit(XOR(this->result, op[0], fs_reg(1)));
+ }
break;
case ir_unop_neg:
op[0].negate = !op[0].negate;
@@ -685,8 +686,10 @@ fs_visitor::visit(ir_expression *ir)
case ir_binop_all_equal:
case ir_binop_nequal:
case ir_binop_any_nequal:
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+ }
emit(CMP(this->result, op[0], op[1],
brw_conditional_for_comparison(ir->operation)));
@@ -757,9 +760,16 @@ fs_visitor::visit(ir_expression *ir)
emit(AND(this->result, op[0], fs_reg(1)));
break;
case ir_unop_b2f:
- temp = fs_reg(this, glsl_type::int_type);
- emit(AND(temp, op[0], fs_reg(1)));
- emit(MOV(this->result, temp));
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ this->result.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(this->result, op[0], fs_reg(0x3f800000u)));
+ this->result.type = BRW_REGISTER_TYPE_F;
+ } else {
+ temp = fs_reg(this, glsl_type::int_type);
+ emit(AND(temp, op[0], fs_reg(1)));
+ emit(MOV(this->result, temp));
+ }
break;
case ir_unop_f2b:
@@ -2214,8 +2224,10 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
case ir_binop_all_equal:
case ir_binop_nequal:
case ir_binop_any_nequal:
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(expr->operands[0], &op[0]);
+ resolve_bool_comparison(expr->operands[1], &op[1]);
+ }
emit(CMP(reg_null_d, op[0], op[1],
brw_conditional_for_comparison(expr->operation)));
@@ -2276,8 +2288,10 @@ fs_visitor::emit_if_gen6(ir_if *ir)
case ir_binop_all_equal:
case ir_binop_nequal:
case ir_binop_any_nequal:
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ resolve_bool_comparison(expr->operands[0], &op[0]);
+ resolve_bool_comparison(expr->operands[1], &op[1]);
+ }
emit(IF(op[0], op[1],
brw_conditional_for_comparison(expr->operation)));
@@ -3117,6 +3131,8 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)
void
fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
{
+ assert(ctx->Const.UniformBooleanTrue == 1);
+
if (rvalue->type != glsl_type::bool_type)
return;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c0b3b74..1e6c382 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1279,10 +1279,11 @@ vec4_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
- /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
- * ones complement of the whole register, not just bit 0.
- */
- emit(XOR(result_dst, op[0], src_reg(1)));
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ emit(NOT(result_dst, op[0]));
+ } else {
+ emit(XOR(result_dst, op[0], src_reg(1)));
+ }
break;
case ir_unop_neg:
op[0].negate = !op[0].negate;
@@ -1462,7 +1463,9 @@ vec4_visitor::visit(ir_expression *ir)
case ir_binop_nequal: {
emit(CMP(result_dst, op[0], op[1],
brw_conditional_for_comparison(ir->operation)));
- emit(AND(result_dst, result_src, src_reg(0x1)));
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ emit(AND(result_dst, result_src, src_reg(1)));
+ }
break;
}
@@ -1472,11 +1475,13 @@ vec4_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(1)));
+ inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
} else {
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
- emit(AND(result_dst, result_src, src_reg(0x1)));
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ emit(AND(result_dst, result_src, src_reg(1)));
+ }
}
break;
case ir_binop_any_nequal:
@@ -1486,11 +1491,13 @@ vec4_visitor::visit(ir_expression *ir)
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(1)));
+ inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
} else {
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
- emit(AND(result_dst, result_src, src_reg(0x1)));
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ emit(AND(result_dst, result_src, src_reg(1)));
+ }
}
break;
@@ -1498,7 +1505,7 @@ vec4_visitor::visit(ir_expression *ir)
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(1)));
+ inst = emit(MOV(result_dst, src_reg(ctx->Const.UniformBooleanTrue)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
break;
@@ -1547,18 +1554,34 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_u2f:
- case ir_unop_b2f:
- case ir_unop_b2i:
case ir_unop_f2i:
case ir_unop_f2u:
emit(MOV(result_dst, op[0]));
break;
+ case ir_unop_b2i:
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ emit(AND(result_dst, op[0], src_reg(1)));
+ } else {
+ emit(MOV(result_dst, op[0]));
+ }
+ break;
+ case ir_unop_b2f:
+ if (ctx->Const.UniformBooleanTrue != 1) {
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result_dst.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(result_dst, op[0], src_reg(0x3f800000u)));
+ result_dst.type = BRW_REGISTER_TYPE_F;
+ } else {
+ emit(MOV(result_dst, op[0]));
+ }
+ break;
case ir_unop_f2b:
- case ir_unop_i2b: {
+ case ir_unop_i2b:
emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- emit(AND(result_dst, result_src, src_reg(1)));
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ emit(AND(result_dst, result_src, src_reg(1)));
+ }
break;
- }
case ir_unop_trunc:
emit(RNDZ(result_dst, op[0]));
@@ -1674,11 +1697,15 @@ vec4_visitor::visit(ir_expression *ir)
const_offset % 16 / 4,
const_offset % 16 / 4);
- /* UBO bools are any nonzero int. We store bools as either 0 or 1. */
+ /* UBO bools are any nonzero int. We need to convert them to use the
+ * value of true stored in ctx->Const.UniformBooleanTrue.
+ */
if (ir->type->base_type == GLSL_TYPE_BOOL) {
emit(CMP(result_dst, packed_consts, src_reg(0u),
BRW_CONDITIONAL_NZ));
- emit(AND(result_dst, result, src_reg(0x1)));
+ if (ctx->Const.UniformBooleanTrue == 1) {
+ emit(AND(result_dst, result, src_reg(1)));
+ }
} else {
emit(MOV(result_dst, packed_consts));
}
--
1.8.5.5
More information about the mesa-dev
mailing list