[Mesa-dev] [PATCH 2/9] glsl: Delete the ir_binop_bfm and ir_triop_bfi opcodes.

Tue Jan 12 02:22:47 PST 2016

Looks good to me, FWIW I built radeonsi and llvmpipe with this change
just in case and I saw no issues.

Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

On Mon, 2016-01-11 at 14:48 -0800, Matt Turner wrote:
> From: Kenneth Graunke <kenneth at whitecape.org>
> 
> TGSI doesn't use these - it just translates ir_quadop_bitfield_insert
> directly.  NIR can handle ir_quadop_bitfield_insert as well.
> 
> These opcodes were only used for i965, and with Jason's recent patches,
> we can do this lowering in NIR (which also gains us SPIR-V handling).
> So there's not much point to retaining this GLSL IR lowering code.
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> Reviewed-by: Matt Turner <mattst88 at gmail.com>
> ---
> [mattst88] v2:
> 	Remove more uses of the opcodes, in st_glsl_to_tgsi.cpp and ir_to_mesa.cpp.
> 	Update unreachable() messages in i965
> 	Delete BITFIELD_INSERT_TO_BFM_BFI and uses, renumber subsequent #defines
> 
>  src/glsl/ir.cpp                                    |  4 --
>  src/glsl/ir.h                                      | 22 +---------
>  src/glsl/ir_constant_expression.cpp                | 17 --------
>  src/glsl/ir_optimization.h                         | 13 +++---
>  src/glsl/ir_validate.cpp                           | 12 ------
>  src/glsl/lower_instructions.cpp                    | 47 ----------------------
>  src/glsl/nir/glsl_to_nir.cpp                       |  4 --
>  .../dri/i965/brw_fs_channel_expressions.cpp        | 30 ++++----------
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp           |  3 +-
>  src/mesa/drivers/dri/i965/brw_link.cpp             |  2 -
>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp         |  3 +-
>  src/mesa/program/ir_to_mesa.cpp                    |  2 -
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp         |  2 -
>  13 files changed, 16 insertions(+), 145 deletions(-)
> 
> diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
> index d82bccd..b424edd 100644
> --- a/src/glsl/ir.cpp
> +++ b/src/glsl/ir.cpp
> @@ -431,7 +431,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
>     case ir_binop_borrow:
>     case ir_binop_lshift:
>     case ir_binop_rshift:
> -   case ir_binop_bfm:
>     case ir_binop_ldexp:
>     case ir_binop_interpolate_at_offset:
>     case ir_binop_interpolate_at_sample:
> @@ -468,7 +467,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1,
>        this->type = op0->type;
>        break;
>  
> -   case ir_triop_bfi:
>     case ir_triop_csel:
>        this->type = op1->type;
>        break;
> @@ -602,7 +600,6 @@ static const char *const operator_strs[] = {
>     "max",
>     "pow",
>     "packHalf2x16_split",
> -   "bfm",
>     "ubo_load",
>     "ldexp",
>     "vector_extract",
> @@ -611,7 +608,6 @@ static const char *const operator_strs[] = {
>     "fma",
>     "lrp",
>     "csel",
> -   "bfi",
>     "bitfield_extract",
>     "vector_insert",
>     "bitfield_insert",
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 93e0734..a2eb508 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -1551,15 +1551,6 @@ enum ir_expression_operation {
>     /*@}*/
>  
>     /**
> -    * \name First half of a lowered bitfieldInsert() operation.
> -    *
> -    * \see lower_instructions::bitfield_insert_to_bfm_bfi
> -    */
> -   /*@{*/
> -   ir_binop_bfm,
> -   /*@}*/
> -
> -   /**
>      * Load a value the size of a given GLSL type from a uniform block.
>      *
>      * operand0 is the ir_constant uniform block index in the linked shader.
> @@ -1624,15 +1615,6 @@ enum ir_expression_operation {
>     ir_triop_csel,
>     /*@}*/
>  
> -   /**
> -    * \name Second half of a lowered bitfieldInsert() operation.
> -    *
> -    * \see lower_instructions::bitfield_insert_to_bfm_bfi
> -    */
> -   /*@{*/
> -   ir_triop_bfi,
> -   /*@}*/
> -
>     ir_triop_bitfield_extract,
>  
>     /**
> @@ -1729,9 +1711,7 @@ public:
>               operation == ir_quadop_vector ||
>               /* TODO: these can't currently be vectorized */
>               operation == ir_quadop_bitfield_insert ||
> -             operation == ir_triop_bitfield_extract ||
> -             operation == ir_triop_bfi ||
> -             operation == ir_binop_bfm;
> +             operation == ir_triop_bitfield_extract;
>     }
>  
>     /**
> diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
> index f02e959..38b6dd5 100644
> --- a/src/glsl/ir_constant_expression.cpp
> +++ b/src/glsl/ir_constant_expression.cpp
> @@ -1616,23 +1616,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
>        break;
>     }
>  
> -   case ir_binop_bfm: {
> -      int bits = op[0]->value.i[0];
> -      int offset = op[1]->value.i[0];
> -
> -      for (unsigned c = 0; c < components; c++) {
> -         if (bits == 0)
> -            data.u[c] = op[0]->value.u[c];
> -         else if (offset < 0 || bits < 0)
> -            data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */
> -         else if (offset + bits > 32)
> -            data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */
> -         else
> -            data.u[c] = ((1 << bits) - 1) << offset;
> -      }
> -      break;
> -   }
> -
>     case ir_binop_ldexp:
>        for (unsigned c = 0; c < components; c++) {
>           if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) {
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index dabd80a..be86f54 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -36,13 +36,12 @@
>  #define LOG_TO_LOG2        0x10
>  #define MOD_TO_FLOOR       0x20
>  #define INT_DIV_TO_MUL_RCP 0x40
> -#define BITFIELD_INSERT_TO_BFM_BFI 0x80
> -#define LDEXP_TO_ARITH     0x100
> -#define CARRY_TO_ARITH     0x200
> -#define BORROW_TO_ARITH    0x400
> -#define SAT_TO_CLAMP       0x800
> -#define DOPS_TO_DFRAC      0x1000
> -#define DFREXP_DLDEXP_TO_ARITH    0x2000
> +#define LDEXP_TO_ARITH     0x80
> +#define CARRY_TO_ARITH     0x100
> +#define BORROW_TO_ARITH    0x200
> +#define SAT_TO_CLAMP       0x400
> +#define DOPS_TO_DFRAC      0x800
> +#define DFREXP_DLDEXP_TO_ARITH    0x1000
>  
>  /**
>   * \see class lower_packing_builtins_visitor
> diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
> index dcc079c..a4d6182 100644
> --- a/src/glsl/ir_validate.cpp
> +++ b/src/glsl/ir_validate.cpp
> @@ -573,12 +573,6 @@ ir_validate::visit_leave(ir_expression *ir)
>        assert(ir->operands[1]->type == glsl_type::float_type);
>        break;
>  
> -   case ir_binop_bfm:
> -      assert(ir->type->is_integer());
> -      assert(ir->operands[0]->type->is_integer());
> -      assert(ir->operands[1]->type->is_integer());
> -      break;
> -
>     case ir_binop_ubo_load:
>        assert(ir->operands[0]->type == glsl_type::uint_type);
>  
> @@ -637,12 +631,6 @@ ir_validate::visit_leave(ir_expression *ir)
>        assert(ir->type == ir->operands[2]->type);
>        break;
>  
> -   case ir_triop_bfi:
> -      assert(ir->operands[0]->type->is_integer());
> -      assert(ir->operands[1]->type == ir->operands[2]->type);
> -      assert(ir->operands[1]->type == ir->type);
> -      break;
> -
>     case ir_triop_bitfield_extract:
>        assert(ir->operands[0]->type == ir->type);
>        assert(ir->operands[1]->type == glsl_type::int_type);
> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
> index 845cfff..f70db87 100644
> --- a/src/glsl/lower_instructions.cpp
> +++ b/src/glsl/lower_instructions.cpp
> @@ -39,7 +39,6 @@
>   * - MOD_TO_FLOOR
>   * - LDEXP_TO_ARITH
>   * - DFREXP_TO_ARITH
> - * - BITFIELD_INSERT_TO_BFM_BFI
>   * - CARRY_TO_ARITH
>   * - BORROW_TO_ARITH
>   * - SAT_TO_CLAMP
> @@ -99,14 +98,6 @@
>   * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to
>   * arithmetic and bit ops for double arguments.
>   *
> - * BITFIELD_INSERT_TO_BFM_BFI:
> - * ---------------------------
> - * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and
> - * ir_triop_bfi (bitfield insert).
> - *
> - * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5
> - * with a pair of instructions.
> - *
>   * CARRY_TO_ARITH:
>   * ---------------
>   * Converts ir_carry into (x + y) < x.
> @@ -154,7 +145,6 @@ private:
>     void exp_to_exp2(ir_expression *);
>     void pow_to_exp2(ir_expression *);
>     void log_to_log2(ir_expression *);
> -   void bitfield_insert_to_bfm_bfi(ir_expression *);
>     void ldexp_to_arith(ir_expression *);
>     void dldexp_to_arith(ir_expression *);
>     void dfrexp_sig_to_arith(ir_expression *);
> @@ -348,29 +338,6 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
>  }
>  
>  void
> -lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
> -{
> -   /* Translates
> -    *    ir_quadop_bitfield_insert base insert offset bits
> -    * into
> -    *    ir_triop_bfi (ir_binop_bfm bits offset) insert base
> -    */
> -
> -   ir_rvalue *base_expr = ir->operands[0];
> -
> -   ir->operation = ir_triop_bfi;
> -   ir->operands[0] = new(ir) ir_expression(ir_binop_bfm,
> -                                           ir->type->get_base_type(),
> -                                           ir->operands[3],
> -                                           ir->operands[2]);
> -   /* ir->operands[1] is still the value to insert. */
> -   ir->operands[2] = base_expr;
> -   ir->operands[3] = NULL;
> -
> -   this->progress = true;
> -}
> -
> -void
>  lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
>  {
>     /* Translates
> @@ -482,12 +449,6 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
>                                       exp_shift_clone, exp_width);
>     ir->operands[1] = NULL;
>  
> -   /* Don't generate new IR that would need to be lowered in an additional
> -    * pass.
> -    */
> -   if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
> -      bitfield_insert_to_bfm_bfi(ir->operands[0]->as_expression());
> -
>     this->progress = true;
>  }
>  
> @@ -602,9 +563,6 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
>              exp_shift->clone(ir, NULL),
>              exp_width->clone(ir, NULL));
>  
> -      if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
> -         bitfield_insert_to_bfm_bfi(bfi);
> -
>        i.insert_before(assign(unpacked, bfi, WRITEMASK_Y));
>  
>        results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
> @@ -1039,11 +997,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
>  	 pow_to_exp2(ir);
>        break;
>  
> -   case ir_quadop_bitfield_insert:
> -      if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
> -         bitfield_insert_to_bfm_bfi(ir);
> -      break;
> -
>     case ir_binop_ldexp:
>        if (lowering(LDEXP_TO_ARITH) && ir->type->is_float())
>           ldexp_to_arith(ir);
> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
> index 12efb44..c7399eb 100644
> --- a/src/glsl/nir/glsl_to_nir.cpp
> +++ b/src/glsl/nir/glsl_to_nir.cpp
> @@ -1721,7 +1721,6 @@ nir_visitor::visit(ir_expression *ir)
>     case ir_binop_pack_half_2x16_split:
>           result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
>           break;
> -   case ir_binop_bfm:   result = nir_bfm(&b, srcs[0], srcs[1]);   break;
>     case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
>     case ir_triop_fma:
>        result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
> @@ -1735,9 +1734,6 @@ nir_visitor::visit(ir_expression *ir)
>        else
>           result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
>        break;
> -   case ir_triop_bfi:
> -      result = nir_bfi(&b, srcs[0], srcs[1], srcs[2]);
> -      break;
>     case ir_triop_bitfield_extract:
>        result = (out_type == GLSL_TYPE_INT) ?
>           nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> index 78a8240..21f0b70 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
> @@ -143,7 +143,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
>     ir_expression *expr = ir->rhs->as_expression();
>     bool found_vector = false;
>     unsigned int i, vector_elements = 1;
> -   ir_variable *op_var[3];
> +   ir_variable *op_var[4];
>  
>     if (!expr)
>        return visit_continue;
> @@ -345,20 +345,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
>     case ir_unop_noise:
>        unreachable("noise should have been broken down to function call");
>  
> -   case ir_binop_bfm: {
> -      /* Does not need to be scalarized, since its result will be identical
> -       * for all channels.
> -       */
> -      ir_rvalue *op0 = get_element(op_var[0], 0);
> -      ir_rvalue *op1 = get_element(op_var[1], 0);
> -
> -      assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
> -                                               element_type,
> -                                               op0,
> -                                               op1));
> -      break;
> -   }
> -
>     case ir_binop_ubo_load:
>     case ir_unop_get_buffer_size:
>        unreachable("not yet supported");
> @@ -380,22 +366,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
>        }
>        break;
>  
> -   case ir_triop_bfi: {
> -      /* Only a single BFM is needed for multiple BFIs. */
> -      ir_rvalue *op0 = get_element(op_var[0], 0);
> -
> +   case ir_quadop_bitfield_insert:
>        for (i = 0; i < vector_elements; i++) {
> +         ir_rvalue *op0 = get_element(op_var[0], i);
>           ir_rvalue *op1 = get_element(op_var[1], i);
>           ir_rvalue *op2 = get_element(op_var[2], i);
> +         ir_rvalue *op3 = get_element(op_var[3], i);
>  
>           assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
>                                                    element_type,
> -                                                  op0->clone(mem_ctx, NULL),
> +                                                  op0,
>                                                    op1,
> -                                                  op2));
> +                                                  op2,
> +                                                  op3));
>        }
>        break;
> -   }
>  
>     case ir_unop_pack_snorm_2x16:
>     case ir_unop_pack_snorm_4x8:
> @@ -410,7 +395,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
>     case ir_binop_ldexp:
>     case ir_binop_vector_extract:
>     case ir_triop_vector_insert:
> -   case ir_quadop_bitfield_insert:
>     case ir_quadop_vector:
>     case ir_unop_ssbo_unsized_array_length:
>        unreachable("should have been lowered");
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index ad347fc..8740925 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1037,8 +1037,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>        break;
>  
>     case nir_op_bitfield_insert:
> -      unreachable("not reached: should be handled by "
> -                  "lower_instructions::bitfield_insert_to_bfm_bfi");
> +      unreachable("not reached: should have been lowered");
>  
>     case nir_op_ishl:
>        bld.SHL(result, op[0], op[1]);
> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
> index 766c57f..234afd5 100644
> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> @@ -126,14 +126,12 @@ process_glsl_ir(gl_shader_stage stage,
>      */
>     brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
>     do_mat_op_to_vec(shader->ir);
> -   const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
>     lower_instructions(shader->ir,
>                        MOD_TO_FLOOR |
>                        DIV_TO_MUL_RCP |
>                        SUB_TO_ADD_NEG |
>                        EXP_TO_EXP2 |
>                        LOG_TO_LOG2 |
> -                      bitfield_insert |
>                        LDEXP_TO_ARITH |
>                        CARRY_TO_ARITH |
>                        BORROW_TO_ARITH);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> index a3bdbc3..ecca166 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> @@ -1405,8 +1405,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>        break;
>  
>     case nir_op_bitfield_insert:
> -      unreachable("not reached: should be handled by "
> -                  "lower_instructions::bitfield_insert_to_bfm_bfi");
> +      unreachable("not reached: should have been lowered");
>  
>     case nir_op_fsign:
>        /* AND(val, 0x80000000) gives the sign bit.
> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
> index 852655d..9cde28d 100644
> --- a/src/mesa/program/ir_to_mesa.cpp
> +++ b/src/mesa/program/ir_to_mesa.cpp
> @@ -1303,9 +1303,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
>        break;
>  
>     case ir_binop_vector_extract:
> -   case ir_binop_bfm:
>     case ir_triop_fma:
> -   case ir_triop_bfi:
>     case ir_triop_bitfield_extract:
>     case ir_triop_vector_insert:
>     case ir_quadop_bitfield_insert:
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 27a0a4f..d424e3b 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -2183,8 +2183,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>     case ir_unop_unpack_unorm_4x8:
>  
>     case ir_binop_pack_half_2x16_split:
> -   case ir_binop_bfm:
> -   case ir_triop_bfi:
>     case ir_quadop_vector:
>     case ir_binop_vector_extract:
>     case ir_triop_vector_insert: