[Mesa-dev] [RFC] i965: Delete brw_do_channel_expressions().

Matt Turner mattst88 at gmail.com
Mon Jul 25 22:04:43 UTC 2016


On Haswell:

total instructions in shared programs: 6193528 -> 6197476 (0.06%)
instructions in affected programs: 756099 -> 760047 (0.52%)
helped: 1057
HURT: 2983

total cycles in shared programs: 71120700 -> 71169938 (0.07%)
cycles in affected programs: 53444478 -> 53493716 (0.09%)
helped: 12737
HURT: 20309

total loops in shared programs: 2493 -> 2493 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 2017 -> 1959 (-2.88%)
spills in affected programs: 1280 -> 1222 (-4.53%)
helped: 7
HURT: 2

total fills in shared programs: 1652 -> 1590 (-3.75%)
fills in affected programs: 1061 -> 999 (-5.84%)
helped: 7
HURT: 2

LOST:   46
GAINED: 26

On Skylake:

total instructions in shared programs: 11688933 -> 11697000 (0.07%)
instructions in affected programs: 1516104 -> 1524171 (0.53%)
helped: 1800
HURT: 5498

total cycles in shared programs: 133418304 -> 133453022 (0.03%)
cycles in affected programs: 105480208 -> 105514926 (0.03%)
helped: 20491
HURT: 36534

total loops in shared programs: 3218 -> 3218 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 3754 -> 3847 (2.48%)
spills in affected programs: 705 -> 798 (13.19%)
helped: 2
HURT: 61

total fills in shared programs: 5353 -> 5444 (1.70%)
fills in affected programs: 659 -> 750 (13.81%)
helped: 2
HURT: 61

LOST:   27
GAINED: 23

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94477
---
I'm not advocating for applying this. I think the results show
pretty clearly that we're not ready to get rid of channel expressions.

But how do we ready ourselves to finally delete it? Hopefully Tim's
work will help?


 src/mesa/drivers/dri/i965/Makefile.sources         |   1 -
 .../dri/i965/brw_fs_channel_expressions.cpp        | 447 ---------------------
 src/mesa/drivers/dri/i965/brw_link.cpp             |   3 -
 3 files changed, 451 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index df6b5dd..b5fe171 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -124,7 +124,6 @@ i965_FILES = \
 	brw_ff_gs.c \
 	brw_ff_gs_emit.c \
 	brw_ff_gs.h \
-	brw_fs_channel_expressions.cpp \
 	brw_fs_vector_splitting.cpp \
 	brw_formatquery.c \
 	brw_gs.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
deleted file mode 100644
index 5eac8d4..0000000
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file brw_wm_channel_expressions.cpp
- *
- * Breaks vector operations down into operations on each component.
- *
- * The 965 fragment shader receives 8 or 16 pixels at a time, so each
- * channel of a vector is laid out as 1 or 2 8-float registers.  Each
- * ALU operation operates on one of those channel registers.  As a
- * result, there is no value to the 965 fragment shader in tracking
- * "vector" expressions in the sense of GLSL fragment shaders, when
- * doing a channel at a time may help in constant folding, algebraic
- * simplification, and reducing the liveness of channel registers.
- *
- * The exception to the desire to break everything down to floats is
- * texturing.  The texture sampler returns a writemasked masked
- * 4/8-register sequence containing the texture values.  We don't want
- * to dispatch to the sampler separately for each channel we need, so
- * we do retain the vector types in that case.
- */
-
-#include "compiler/glsl/ir.h"
-#include "compiler/glsl/ir_expression_flattening.h"
-#include "compiler/glsl_types.h"
-
-class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
-public:
-   ir_channel_expressions_visitor()
-   {
-      this->progress = false;
-      this->mem_ctx = NULL;
-   }
-
-   ir_visitor_status visit_leave(ir_assignment *);
-
-   ir_rvalue *get_element(ir_variable *var, unsigned int element);
-   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
-
-   bool progress;
-   void *mem_ctx;
-};
-
-static bool
-channel_expressions_predicate(ir_instruction *ir)
-{
-   ir_expression *expr = ir->as_expression();
-   unsigned int i;
-
-   if (!expr)
-      return false;
-
-   switch (expr->operation) {
-      case ir_unop_pack_half_2x16:
-      case ir_unop_pack_snorm_2x16:
-      case ir_unop_pack_snorm_4x8:
-      case ir_unop_pack_unorm_2x16:
-      case ir_unop_pack_unorm_4x8:
-         return false;
-
-      /* these opcodes need to act on the whole vector,
-       * just like texturing.
-       */
-      case ir_unop_interpolate_at_centroid:
-      case ir_binop_interpolate_at_offset:
-      case ir_binop_interpolate_at_sample:
-      case ir_unop_pack_double_2x32:
-         return false;
-      default:
-         break;
-   }
-
-   for (i = 0; i < expr->get_num_operands(); i++) {
-      if (expr->operands[i]->type->is_vector())
-	 return true;
-   }
-
-   return false;
-}
-
-bool
-brw_do_channel_expressions(exec_list *instructions)
-{
-   ir_channel_expressions_visitor v;
-
-   /* Pull out any matrix expression to a separate assignment to a
-    * temp.  This will make our handling of the breakdown to
-    * operations on the matrix's vector components much easier.
-    */
-   do_expression_flattening(instructions, channel_expressions_predicate);
-
-   visit_list_elements(&v, instructions);
-
-   return v.progress;
-}
-
-ir_rvalue *
-ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
-{
-   ir_dereference *deref;
-
-   if (var->type->is_scalar())
-      return new(mem_ctx) ir_dereference_variable(var);
-
-   assert(elem < var->type->components());
-   deref = new(mem_ctx) ir_dereference_variable(var);
-   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
-}
-
-void
-ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
-{
-   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
-   ir_assignment *assign;
-
-   /* This assign-of-expression should have been generated by the
-    * expression flattening visitor (since we never short circit to
-    * not flatten, even for plain assignments of variables), so the
-    * writemask is always full.
-    */
-   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
-
-   assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
-   ir->insert_before(assign);
-}
-
-ir_visitor_status
-ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
-{
-   ir_expression *expr = ir->rhs->as_expression();
-   bool found_vector = false;
-   unsigned int i, vector_elements = 1;
-   ir_variable *op_var[4];
-
-   if (!expr)
-      return visit_continue;
-
-   if (!this->mem_ctx)
-      this->mem_ctx = ralloc_parent(ir);
-
-   for (i = 0; i < expr->get_num_operands(); i++) {
-      if (expr->operands[i]->type->is_vector()) {
-	 found_vector = true;
-	 vector_elements = expr->operands[i]->type->vector_elements;
-	 break;
-      }
-   }
-   if (!found_vector)
-      return visit_continue;
-
-   switch (expr->operation) {
-      case ir_unop_pack_half_2x16:
-      case ir_unop_pack_snorm_2x16:
-      case ir_unop_pack_snorm_4x8:
-      case ir_unop_pack_unorm_2x16:
-      case ir_unop_pack_unorm_4x8:
-      case ir_unop_interpolate_at_centroid:
-      case ir_binop_interpolate_at_offset:
-      case ir_binop_interpolate_at_sample:
-      /* We scalarize these in NIR, so no need to do it here */
-      case ir_unop_pack_double_2x32:
-         return visit_continue;
-
-      default:
-         break;
-   }
-
-   /* Store the expression operands in temps so we can use them
-    * multiple times.
-    */
-   for (i = 0; i < expr->get_num_operands(); i++) {
-      ir_assignment *assign;
-      ir_dereference *deref;
-
-      assert(!expr->operands[i]->type->is_matrix());
-
-      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
-					   "channel_expressions",
-					   ir_var_temporary);
-      ir->insert_before(op_var[i]);
-
-      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
-      assign = new(mem_ctx) ir_assignment(deref,
-					  expr->operands[i],
-					  NULL);
-      ir->insert_before(assign);
-   }
-
-   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
-							   1, 1);
-
-   /* OK, time to break down this vector operation. */
-   switch (expr->operation) {
-   case ir_unop_bit_not:
-   case ir_unop_logic_not:
-   case ir_unop_neg:
-   case ir_unop_abs:
-   case ir_unop_sign:
-   case ir_unop_rcp:
-   case ir_unop_rsq:
-   case ir_unop_sqrt:
-   case ir_unop_exp:
-   case ir_unop_log:
-   case ir_unop_exp2:
-   case ir_unop_log2:
-   case ir_unop_bitcast_i2f:
-   case ir_unop_bitcast_f2i:
-   case ir_unop_bitcast_f2u:
-   case ir_unop_bitcast_u2f:
-   case ir_unop_i2u:
-   case ir_unop_u2i:
-   case ir_unop_f2i:
-   case ir_unop_f2u:
-   case ir_unop_i2f:
-   case ir_unop_f2b:
-   case ir_unop_b2f:
-   case ir_unop_i2b:
-   case ir_unop_b2i:
-   case ir_unop_u2f:
-   case ir_unop_d2f:
-   case ir_unop_f2d:
-   case ir_unop_d2i:
-   case ir_unop_i2d:
-   case ir_unop_d2u:
-   case ir_unop_u2d:
-   case ir_unop_d2b:
-   case ir_unop_trunc:
-   case ir_unop_ceil:
-   case ir_unop_floor:
-   case ir_unop_fract:
-   case ir_unop_round_even:
-   case ir_unop_sin:
-   case ir_unop_cos:
-   case ir_unop_dFdx:
-   case ir_unop_dFdx_coarse:
-   case ir_unop_dFdx_fine:
-   case ir_unop_dFdy:
-   case ir_unop_dFdy_coarse:
-   case ir_unop_dFdy_fine:
-   case ir_unop_bitfield_reverse:
-   case ir_unop_bit_count:
-   case ir_unop_find_msb:
-   case ir_unop_find_lsb:
-   case ir_unop_saturate:
-   case ir_unop_subroutine_to_int:
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-
-	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-						  element_type,
-						  op0,
-						  NULL));
-      }
-      break;
-
-   case ir_binop_add:
-   case ir_binop_sub:
-   case ir_binop_mul:
-   case ir_binop_imul_high:
-   case ir_binop_div:
-   case ir_binop_carry:
-   case ir_binop_borrow:
-   case ir_binop_mod:
-   case ir_binop_min:
-   case ir_binop_max:
-   case ir_binop_pow:
-   case ir_binop_lshift:
-   case ir_binop_rshift:
-   case ir_binop_bit_and:
-   case ir_binop_bit_xor:
-   case ir_binop_bit_or:
-   case ir_binop_logic_and:
-   case ir_binop_logic_xor:
-   case ir_binop_logic_or:
-   case ir_binop_less:
-   case ir_binop_greater:
-   case ir_binop_lequal:
-   case ir_binop_gequal:
-   case ir_binop_equal:
-   case ir_binop_nequal:
-   case ir_binop_ldexp:
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-
-	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-						  element_type,
-						  op0,
-						  op1));
-      }
-      break;
-
-   case ir_binop_dot: {
-      ir_expression *last = NULL;
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-	 ir_expression *temp;
-
-	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
-					   element_type,
-					   op0,
-					   op1);
-	 if (last) {
-	    last = new(mem_ctx) ir_expression(ir_binop_add,
-					      element_type,
-					      temp,
-					      last);
-	 } else {
-	    last = temp;
-	 }
-      }
-      assign(ir, 0, last);
-      break;
-   }
-
-   case ir_binop_all_equal:
-   case ir_binop_any_nequal: {
-      ir_expression *last = NULL;
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-	 ir_expression *temp;
-	 ir_expression_operation join;
-
-	 if (expr->operation == ir_binop_all_equal)
-	    join = ir_binop_logic_and;
-	 else
-	    join = ir_binop_logic_or;
-
-	 temp = new(mem_ctx) ir_expression(expr->operation,
-					   element_type,
-					   op0,
-					   op1);
-	 if (last) {
-	    last = new(mem_ctx) ir_expression(join,
-					      element_type,
-					      temp,
-					      last);
-	 } else {
-	    last = temp;
-	 }
-      }
-      assign(ir, 0, last);
-      break;
-   }
-   case ir_unop_noise:
-      unreachable("noise should have been broken down to function call");
-
-   case ir_binop_ubo_load:
-   case ir_unop_get_buffer_size:
-      unreachable("not yet supported");
-
-   case ir_triop_fma:
-   case ir_triop_lrp:
-   case ir_triop_csel:
-   case ir_triop_bitfield_extract:
-      for (i = 0; i < vector_elements; i++) {
-	 ir_rvalue *op0 = get_element(op_var[0], i);
-	 ir_rvalue *op1 = get_element(op_var[1], i);
-	 ir_rvalue *op2 = get_element(op_var[2], i);
-
-	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-						  element_type,
-						  op0,
-						  op1,
-						  op2));
-      }
-      break;
-
-   case ir_quadop_bitfield_insert:
-      for (i = 0; i < vector_elements; i++) {
-         ir_rvalue *op0 = get_element(op_var[0], i);
-         ir_rvalue *op1 = get_element(op_var[1], i);
-         ir_rvalue *op2 = get_element(op_var[2], i);
-         ir_rvalue *op3 = get_element(op_var[3], i);
-
-         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
-                                                  element_type,
-                                                  op0,
-                                                  op1,
-                                                  op2,
-                                                  op3));
-      }
-      break;
-
-   case ir_unop_pack_snorm_2x16:
-   case ir_unop_pack_snorm_4x8:
-   case ir_unop_pack_unorm_2x16:
-   case ir_unop_pack_unorm_4x8:
-   case ir_unop_pack_half_2x16:
-   case ir_unop_unpack_snorm_2x16:
-   case ir_unop_unpack_snorm_4x8:
-   case ir_unop_unpack_unorm_2x16:
-   case ir_unop_unpack_unorm_4x8:
-   case ir_unop_unpack_half_2x16:
-   case ir_binop_vector_extract:
-   case ir_triop_vector_insert:
-   case ir_quadop_vector:
-   case ir_unop_ssbo_unsized_array_length:
-      unreachable("should have been lowered");
-
-   case ir_unop_interpolate_at_centroid:
-   case ir_binop_interpolate_at_offset:
-   case ir_binop_interpolate_at_sample:
-   case ir_unop_unpack_double_2x32:
-      unreachable("not reached: expression operates on scalars only");
-
-   case ir_unop_pack_double_2x32:
-      unreachable("not reached: to be lowered in NIR, should've been skipped");
-
-   case ir_unop_frexp_sig:
-   case ir_unop_frexp_exp:
-      unreachable("should have been lowered by lower_instructions");
-
-   case ir_unop_vote_any:
-   case ir_unop_vote_all:
-   case ir_unop_vote_eq:
-      unreachable("unsupported");
-   }
-
-   ir->remove();
-   this->progress = true;
-
-   return visit_continue;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index 1ad2369..0ee661b 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -156,9 +156,6 @@ process_glsl_ir(gl_shader_stage stage,
       progress = false;
 
       if (compiler->scalar_stage[shader->Stage]) {
-         if (shader->Stage == MESA_SHADER_VERTEX ||
-             shader->Stage == MESA_SHADER_FRAGMENT)
-            brw_do_channel_expressions(shader->ir);
          brw_do_vector_splitting(shader->ir);
       }
 
-- 
2.7.3



More information about the mesa-dev mailing list