[Mesa-dev] [PATCH 3/9] glsl, nir: Make ir_quadop_bitfield_insert a vectorized operation.

Mon Jan 11 14:48:36 PST 2016

From: Kenneth Graunke <kenneth at whitecape.org>

We would like to be able to combine

   result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x);
   result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y);
   result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z);
   result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w);

into a single ivec4 bitfieldInsert operation.  This should be possible
with most drivers.

This patch changes the offset and bits parameters from scalar ints
to ivecN or uvecN.  The type of all four operands will be the same,
for simplicity.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Reviewed-by: Matt Turner <mattst88 at gmail.com>
---
[mattst88] v2:
	Use swizzle(expr, SWIZZLE_XXXX, type->vector_elements) instead of
	swizzle_for_size(expr, type->vector_elements). The latter does not
	provide the wanted operation of expanding a scalar.

	Expand arguments to bitfield_insert() in ldexp() lowering passes.

 src/glsl/builtin_functions.cpp      | 8 +++++++-
 src/glsl/ir.h                       | 1 -
 src/glsl/ir_constant_expression.cpp | 6 +++---
 src/glsl/ir_validate.cpp            | 5 +++--
 src/glsl/lower_instructions.cpp     | 8 ++++----
 src/glsl/nir/nir_opcodes.py         | 4 ++--
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 602852a..38383bc 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_bitfieldInsert(const glsl_type *type)
 {
+   bool is_uint = type->base_type == GLSL_TYPE_UINT;
    ir_variable *base   = in_var(type, "base");
    ir_variable *insert = in_var(type, "insert");
    ir_variable *offset = in_var(glsl_type::int_type, "offset");
    ir_variable *bits   = in_var(glsl_type::int_type, "bits");
    MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits);
 
-   body.emit(ret(bitfield_insert(base, insert, offset, bits)));
+   operand cast_offset = is_uint ? i2u(offset) : operand(offset);
+   operand cast_bits = is_uint ? i2u(bits) : operand(bits);
+
+   body.emit(ret(bitfield_insert(base, insert,
+      swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements),
+      swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements))));
 
    return sig;
 }
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index a2eb508..9af2fc1 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1710,7 +1710,6 @@ public:
              operation == ir_triop_vector_insert ||
              operation == ir_quadop_vector ||
              /* TODO: these can't currently be vectorized */
-             operation == ir_quadop_bitfield_insert ||
              operation == ir_triop_bitfield_extract;
    }
 
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 38b6dd5..f5b5bd8 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
    }
 
    case ir_quadop_bitfield_insert: {
-      int offset = op[2]->value.i[0];
-      int bits = op[3]->value.i[0];
-
       for (unsigned c = 0; c < components; c++) {
+         int offset = op[2]->value.i[c];
+         int bits = op[3]->value.i[c];
+
          if (bits == 0)
             data.u[c] = op[0]->value.u[c];
          else if (offset < 0 || bits < 0)
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index a4d6182..fea9b76 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_quadop_bitfield_insert:
+      assert(ir->type->is_integer());
       assert(ir->operands[0]->type == ir->type);
       assert(ir->operands[1]->type == ir->type);
-      assert(ir->operands[2]->type == glsl_type::int_type);
-      assert(ir->operands[3]->type == glsl_type::int_type);
+      assert(ir->operands[2]->type == ir->type);
+      assert(ir->operands[3]->type == ir->type);
       break;
 
    case ir_quadop_vector:
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index f70db87..d140be3 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 
    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
 
-   ir_constant *exp_shift = new(ir) ir_constant(23);
-   ir_constant *exp_width = new(ir) ir_constant(8);
+   ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
+   ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
 
    /* Temporary variables */
    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
@@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
 
    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
 
-   ir_constant *exp_shift = new(ir) ir_constant(20);
-   ir_constant *exp_width = new(ir) ir_constant(11);
+   ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem);
+   ir_constant *exp_width = new(ir) ir_constant(11, vec_elem);
    ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
 
    /* Temporary variables */
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 398ae50..3780628 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
           [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
        [tuint, tuint, tint, tint], "", """
 unsigned base = src0, insert = src1;
-int offset = src2.x, bits = src3.x;
+int offset = src2, bits = src3;
 if (bits == 0) {
    dst = 0;
 } else if (offset < 0 || bits < 0 || bits + offset > 32) {
-- 
2.4.9