[Mesa-dev] [PATCH 2/2] i965: Do channel expressions on significantly fewer opcodes.

Thu Jan 21 16:37:21 PST 2016

nir_lower_alu_to_scalar() and nir_lower_load_const_to_scalar()
handle most cases quite well.  They also create nir_ssa_defs rather
than ir_variables, which are much less memory intensive.

This can mean losing out on a few GLSL IR optimizations, however.
In most cases, this is fine.  But a few cases still benefit:

- add/mul/dot still benefit from opt_algebraic()'s constant
  reassociation capabilities.

- min/max still benefit from opt_minmax().

- comparisons seem to still benefit from opt_algebraic(), even
  though we also do most of them in nir_opt_algebraic_late().

With this change, shader-db statistics on Skylake are:

total instructions in shared programs: 9107924 -> 9107347 (-0.01%)
instructions in affected programs: 188830 -> 188253 (-0.31%)
helped: 572
HURT: 154

total cycles in shared programs: 69176332 -> 69129860 (-0.07%)
cycles in affected programs: 23460680 -> 23414208 (-0.20%)
helped: 8102
HURT: 7146

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 .../dri/i965/brw_fs_channel_expressions.cpp        | 221 +++------------------
 1 file changed, 30 insertions(+), 191 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 21f0b70..ed0f679 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -72,15 +72,21 @@ channel_expressions_predicate(ir_instruction *ir)
       return false;
 
    switch (expr->operation) {
-      /* these opcodes need to act on the whole vector,
-       * just like texturing.
-       */
-      case ir_unop_interpolate_at_centroid:
-      case ir_binop_interpolate_at_offset:
-      case ir_binop_interpolate_at_sample:
-         return false;
-      default:
+      case ir_binop_mul:
+      case ir_binop_add:
+      case ir_binop_dot:
+      case ir_binop_min:
+      case ir_binop_max:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_equal:
+      case ir_binop_nequal:
          break;
+
+      default:
+         return false;
    }
 
    for (i = 0; i < expr->get_num_operands(); i++) {
@@ -162,13 +168,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
       return visit_continue;
 
    switch (expr->operation) {
-      case ir_unop_interpolate_at_centroid:
-      case ir_binop_interpolate_at_offset:
-      case ir_binop_interpolate_at_sample:
-         return visit_continue;
+      case ir_binop_mul:
+      case ir_binop_add:
+      case ir_binop_dot:
+      case ir_binop_min:
+      case ir_binop_max:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_equal:
+      case ir_binop_nequal:
+         break;
 
       default:
-         break;
+         return visit_continue;
    }
 
    /* Store the expression operands in temps so we can use them
@@ -197,83 +211,13 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
 
    /* OK, time to break down this vector operation. */
    switch (expr->operation) {
-   case ir_unop_bit_not:
-   case ir_unop_logic_not:
-   case ir_unop_neg:
-   case ir_unop_abs:
-   case ir_unop_sign:
-   case ir_unop_rcp:
-   case ir_unop_rsq:
-   case ir_unop_sqrt:
-   case ir_unop_exp:
-   case ir_unop_log:
-   case ir_unop_exp2:
-   case ir_unop_log2:
-   case ir_unop_bitcast_i2f:
-   case ir_unop_bitcast_f2i:
-   case ir_unop_bitcast_f2u:
-   case ir_unop_bitcast_u2f:
-   case ir_unop_i2u:
-   case ir_unop_u2i:
-   case ir_unop_f2i:
-   case ir_unop_f2u:
-   case ir_unop_i2f:
-   case ir_unop_f2b:
-   case ir_unop_b2f:
-   case ir_unop_i2b:
-   case ir_unop_b2i:
-   case ir_unop_u2f:
-   case ir_unop_trunc:
-   case ir_unop_ceil:
-   case ir_unop_floor:
-   case ir_unop_fract:
-   case ir_unop_round_even:
-   case ir_unop_sin:
-   case ir_unop_cos:
-   case ir_unop_dFdx:
-   case ir_unop_dFdx_coarse:
-   case ir_unop_dFdx_fine:
-   case ir_unop_dFdy:
-   case ir_unop_dFdy_coarse:
-   case ir_unop_dFdy_fine:
-   case ir_unop_bitfield_reverse:
-   case ir_unop_bit_count:
-   case ir_unop_find_msb:
-   case ir_unop_find_lsb:
-   case ir_unop_saturate:
-   case ir_unop_subroutine_to_int:
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-
-	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-						  element_type,
-						  op0,
-						  NULL));
-      }
-      break;
-
    case ir_binop_add:
-   case ir_binop_sub:
    case ir_binop_mul:
-   case ir_binop_imul_high:
-   case ir_binop_div:
-   case ir_binop_carry:
-   case ir_binop_borrow:
-   case ir_binop_mod:
    case ir_binop_min:
    case ir_binop_max:
-   case ir_binop_pow:
-   case ir_binop_lshift:
-   case ir_binop_rshift:
-   case ir_binop_bit_and:
-   case ir_binop_bit_xor:
-   case ir_binop_bit_or:
-   case ir_binop_logic_and:
-   case ir_binop_logic_xor:
-   case ir_binop_logic_or:
    case ir_binop_less:
-   case ir_binop_greater:
    case ir_binop_lequal:
+   case ir_binop_greater:
    case ir_binop_gequal:
    case ir_binop_equal:
    case ir_binop_nequal:
@@ -312,113 +256,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
       break;
    }
 
-   case ir_binop_all_equal:
-   case ir_binop_any_nequal: {
-      ir_expression *last = NULL;
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-	 ir_expression *temp;
-	 ir_expression_operation join;
-
-	 if (expr->operation == ir_binop_all_equal)
-	    join = ir_binop_logic_and;
-	 else
-	    join = ir_binop_logic_or;
-
-	 temp = new(mem_ctx) ir_expression(expr->operation,
-					   element_type,
-					   op0,
-					   op1);
-	 if (last) {
-	    last = new(mem_ctx) ir_expression(join,
-					      element_type,
-					      temp,
-					      last);
-	 } else {
-	    last = temp;
-	 }
-      }
-      assign(ir, 0, last);
-      break;
-   }
-   case ir_unop_noise:
-      unreachable("noise should have been broken down to function call");
-
-   case ir_binop_ubo_load:
-   case ir_unop_get_buffer_size:
-      unreachable("not yet supported");
-
-   case ir_triop_fma:
-   case ir_triop_lrp:
-   case ir_triop_csel:
-   case ir_triop_bitfield_extract:
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-	 ir_rvalue *op2 = get_element(op_var[2], i);
-
-	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-						  element_type,
-						  op0,
-						  op1,
-						  op2));
-      }
-      break;
-
-   case ir_quadop_bitfield_insert:
-      for (i = 0; i < vector_elements; i++) {
-         ir_rvalue *op0 = get_element(op_var[0], i);
-         ir_rvalue *op1 = get_element(op_var[1], i);
-         ir_rvalue *op2 = get_element(op_var[2], i);
-         ir_rvalue *op3 = get_element(op_var[3], i);
-
-         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-                                                  element_type,
-                                                  op0,
-                                                  op1,
-                                                  op2,
-                                                  op3));
-      }
-      break;
-
-   case ir_unop_pack_snorm_2x16:
-   case ir_unop_pack_snorm_4x8:
-   case ir_unop_pack_unorm_2x16:
-   case ir_unop_pack_unorm_4x8:
-   case ir_unop_pack_half_2x16:
-   case ir_unop_unpack_snorm_2x16:
-   case ir_unop_unpack_snorm_4x8:
-   case ir_unop_unpack_unorm_2x16:
-   case ir_unop_unpack_unorm_4x8:
-   case ir_unop_unpack_half_2x16:
-   case ir_binop_ldexp:
-   case ir_binop_vector_extract:
-   case ir_triop_vector_insert:
-   case ir_quadop_vector:
-   case ir_unop_ssbo_unsized_array_length:
-      unreachable("should have been lowered");
-
-   case ir_unop_unpack_half_2x16_split_x:
-   case ir_unop_unpack_half_2x16_split_y:
-   case ir_binop_pack_half_2x16_split:
-   case ir_unop_interpolate_at_centroid:
-   case ir_binop_interpolate_at_offset:
-   case ir_binop_interpolate_at_sample:
-      unreachable("not reached: expression operates on scalars only");
-
-   case ir_unop_pack_double_2x32:
-   case ir_unop_unpack_double_2x32:
-   case ir_unop_frexp_sig:
-   case ir_unop_frexp_exp:
-   case ir_unop_d2f:
-   case ir_unop_f2d:
-   case ir_unop_d2i:
-   case ir_unop_i2d:
-   case ir_unop_d2u:
-   case ir_unop_u2d:
-   case ir_unop_d2b:
-      unreachable("no fp64 support yet");
+   default:
+      unreachable("should have been skipped above");
    }
 
    ir->remove();
-- 
2.7.0