[Mesa-dev] [PATCH 2/2] i965: Do channel expressions on significantly fewer opcodes.
Kenneth Graunke
kenneth at whitecape.org
Thu Jan 21 16:37:21 PST 2016
nir_lower_alu_to_scalar() and nir_lower_load_const_to_scalar()
handle most cases quite well. They also create nir_ssa_defs rather
than ir_variables, which are much less memory intensive.
This can mean losing out on a few GLSL IR optimizations, however.
In most cases, this is fine. But a few cases still benefit:
- add/mul/dot still benefit from opt_algebraic()'s constant
reassociation capabilities.
- min/max still benefit from opt_minmax().
- comparisons seem to still benefit from opt_algebraic(), even
though we also do most of them in nir_opt_algebraic_late().
With this change, shader-db statistics on Skylake are:
total instructions in shared programs: 9107924 -> 9107347 (-0.01%)
instructions in affected programs: 188830 -> 188253 (-0.31%)
helped: 572
HURT: 154
total cycles in shared programs: 69176332 -> 69129860 (-0.07%)
cycles in affected programs: 23460680 -> 23414208 (-0.20%)
helped: 8102
HURT: 7146
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
.../dri/i965/brw_fs_channel_expressions.cpp | 221 +++------------------
1 file changed, 30 insertions(+), 191 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 21f0b70..ed0f679 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -72,15 +72,21 @@ channel_expressions_predicate(ir_instruction *ir)
return false;
switch (expr->operation) {
- /* these opcodes need to act on the whole vector,
- * just like texturing.
- */
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- return false;
- default:
+ case ir_binop_mul:
+ case ir_binop_add:
+ case ir_binop_dot:
+ case ir_binop_min:
+ case ir_binop_max:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
break;
+
+ default:
+ return false;
}
for (i = 0; i < expr->get_num_operands(); i++) {
@@ -162,13 +168,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
return visit_continue;
switch (expr->operation) {
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- return visit_continue;
+ case ir_binop_mul:
+ case ir_binop_add:
+ case ir_binop_dot:
+ case ir_binop_min:
+ case ir_binop_max:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ break;
default:
- break;
+ return visit_continue;
}
/* Store the expression operands in temps so we can use them
@@ -197,83 +211,13 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
/* OK, time to break down this vector operation. */
switch (expr->operation) {
- case ir_unop_bit_not:
- case ir_unop_logic_not:
- case ir_unop_neg:
- case ir_unop_abs:
- case ir_unop_sign:
- case ir_unop_rcp:
- case ir_unop_rsq:
- case ir_unop_sqrt:
- case ir_unop_exp:
- case ir_unop_log:
- case ir_unop_exp2:
- case ir_unop_log2:
- case ir_unop_bitcast_i2f:
- case ir_unop_bitcast_f2i:
- case ir_unop_bitcast_f2u:
- case ir_unop_bitcast_u2f:
- case ir_unop_i2u:
- case ir_unop_u2i:
- case ir_unop_f2i:
- case ir_unop_f2u:
- case ir_unop_i2f:
- case ir_unop_f2b:
- case ir_unop_b2f:
- case ir_unop_i2b:
- case ir_unop_b2i:
- case ir_unop_u2f:
- case ir_unop_trunc:
- case ir_unop_ceil:
- case ir_unop_floor:
- case ir_unop_fract:
- case ir_unop_round_even:
- case ir_unop_sin:
- case ir_unop_cos:
- case ir_unop_dFdx:
- case ir_unop_dFdx_coarse:
- case ir_unop_dFdx_fine:
- case ir_unop_dFdy:
- case ir_unop_dFdy_coarse:
- case ir_unop_dFdy_fine:
- case ir_unop_bitfield_reverse:
- case ir_unop_bit_count:
- case ir_unop_find_msb:
- case ir_unop_find_lsb:
- case ir_unop_saturate:
- case ir_unop_subroutine_to_int:
- for (i = 0; i < vector_elements; i++) {
- ir_rvalue *op0 = get_element(op_var[0], i);
-
- assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
- element_type,
- op0,
- NULL));
- }
- break;
-
case ir_binop_add:
- case ir_binop_sub:
case ir_binop_mul:
- case ir_binop_imul_high:
- case ir_binop_div:
- case ir_binop_carry:
- case ir_binop_borrow:
- case ir_binop_mod:
case ir_binop_min:
case ir_binop_max:
- case ir_binop_pow:
- case ir_binop_lshift:
- case ir_binop_rshift:
- case ir_binop_bit_and:
- case ir_binop_bit_xor:
- case ir_binop_bit_or:
- case ir_binop_logic_and:
- case ir_binop_logic_xor:
- case ir_binop_logic_or:
case ir_binop_less:
- case ir_binop_greater:
case ir_binop_lequal:
+ case ir_binop_greater:
case ir_binop_gequal:
case ir_binop_equal:
case ir_binop_nequal:
@@ -312,113 +256,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
break;
}
- case ir_binop_all_equal:
- case ir_binop_any_nequal: {
- ir_expression *last = NULL;
- for (i = 0; i < vector_elements; i++) {
- ir_rvalue *op0 = get_element(op_var[0], i);
- ir_rvalue *op1 = get_element(op_var[1], i);
- ir_expression *temp;
- ir_expression_operation join;
-
- if (expr->operation == ir_binop_all_equal)
- join = ir_binop_logic_and;
- else
- join = ir_binop_logic_or;
-
- temp = new(mem_ctx) ir_expression(expr->operation,
- element_type,
- op0,
- op1);
- if (last) {
- last = new(mem_ctx) ir_expression(join,
- element_type,
- temp,
- last);
- } else {
- last = temp;
- }
- }
- assign(ir, 0, last);
- break;
- }
- case ir_unop_noise:
- unreachable("noise should have been broken down to function call");
-
- case ir_binop_ubo_load:
- case ir_unop_get_buffer_size:
- unreachable("not yet supported");
-
- case ir_triop_fma:
- case ir_triop_lrp:
- case ir_triop_csel:
- case ir_triop_bitfield_extract:
- for (i = 0; i < vector_elements; i++) {
- ir_rvalue *op0 = get_element(op_var[0], i);
- ir_rvalue *op1 = get_element(op_var[1], i);
- ir_rvalue *op2 = get_element(op_var[2], i);
-
- assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
- element_type,
- op0,
- op1,
- op2));
- }
- break;
-
- case ir_quadop_bitfield_insert:
- for (i = 0; i < vector_elements; i++) {
- ir_rvalue *op0 = get_element(op_var[0], i);
- ir_rvalue *op1 = get_element(op_var[1], i);
- ir_rvalue *op2 = get_element(op_var[2], i);
- ir_rvalue *op3 = get_element(op_var[3], i);
-
- assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
- element_type,
- op0,
- op1,
- op2,
- op3));
- }
- break;
-
- case ir_unop_pack_snorm_2x16:
- case ir_unop_pack_snorm_4x8:
- case ir_unop_pack_unorm_2x16:
- case ir_unop_pack_unorm_4x8:
- case ir_unop_pack_half_2x16:
- case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_snorm_4x8:
- case ir_unop_unpack_unorm_2x16:
- case ir_unop_unpack_unorm_4x8:
- case ir_unop_unpack_half_2x16:
- case ir_binop_ldexp:
- case ir_binop_vector_extract:
- case ir_triop_vector_insert:
- case ir_quadop_vector:
- case ir_unop_ssbo_unsized_array_length:
- unreachable("should have been lowered");
-
- case ir_unop_unpack_half_2x16_split_x:
- case ir_unop_unpack_half_2x16_split_y:
- case ir_binop_pack_half_2x16_split:
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_offset:
- case ir_binop_interpolate_at_sample:
- unreachable("not reached: expression operates on scalars only");
-
- case ir_unop_pack_double_2x32:
- case ir_unop_unpack_double_2x32:
- case ir_unop_frexp_sig:
- case ir_unop_frexp_exp:
- case ir_unop_d2f:
- case ir_unop_f2d:
- case ir_unop_d2i:
- case ir_unop_i2d:
- case ir_unop_d2u:
- case ir_unop_u2d:
- case ir_unop_d2b:
- unreachable("no fp64 support yet");
+ default:
+ unreachable("should have been skipped above");
}
ir->remove();
--
2.7.0
More information about the mesa-dev
mailing list