[Mesa-dev] [PATCHv3 3/9] glsl, nir: Make ir_quadop_bitfield_insert a vectorized operation.

Tue Jan 12 08:14:48 PST 2016

On Mon, Jan 11, 2016 at 7:20 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> On Mon, Jan 11, 2016 at 7:12 PM, Matt Turner <mattst88 at gmail.com> wrote:
>> From: Kenneth Graunke <kenneth at whitecape.org>
>>
>> We would like to be able to combine
>>
>>    result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x);
>>    result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y);
>>    result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z);
>>    result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w);
>>
>> into a single ivec4 bitfieldInsert operation.  This should be possible
>> with most drivers.
>>
>> This patch changes the offset and bits parameters from scalar ints
>> to ivecN or uvecN.  The type of all four operands will be the same,
>> for simplicity.
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> Reviewed-by: Matt Turner <mattst88 at gmail.com>
>> ---
>> [mattst88 v3]: Make some constants unsigned in src/glsl/lower_packing_builtins.cpp
>>
>>  src/glsl/builtin_functions.cpp      |  8 +++++++-
>>  src/glsl/ir.h                       |  1 -
>>  src/glsl/ir_constant_expression.cpp |  6 +++---
>>  src/glsl/ir_validate.cpp            |  5 +++--
>>  src/glsl/lower_instructions.cpp     |  8 ++++----
>>  src/glsl/lower_packing_builtins.cpp | 10 +++++-----
>>  src/glsl/nir/nir_opcodes.py         |  4 ++--
>>  7 files changed, 24 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
>> index 602852a..38383bc 100644
>> --- a/src/glsl/builtin_functions.cpp
>> +++ b/src/glsl/builtin_functions.cpp
>> @@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type *type)
>>  ir_function_signature *
>>  builtin_builder::_bitfieldInsert(const glsl_type *type)
>>  {
>> +   bool is_uint = type->base_type == GLSL_TYPE_UINT;
>>     ir_variable *base   = in_var(type, "base");
>>     ir_variable *insert = in_var(type, "insert");
>>     ir_variable *offset = in_var(glsl_type::int_type, "offset");
>>     ir_variable *bits   = in_var(glsl_type::int_type, "bits");
>>     MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits);
>>
>> -   body.emit(ret(bitfield_insert(base, insert, offset, bits)));
>> +   operand cast_offset = is_uint ? i2u(offset) : operand(offset);
>> +   operand cast_bits = is_uint ? i2u(bits) : operand(bits);
>> +
>> +   body.emit(ret(bitfield_insert(base, insert,
>> +      swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements),
>> +      swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements))));
>>
>>     return sig;
>>  }
>> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
>> index a2eb508..9af2fc1 100644
>> --- a/src/glsl/ir.h
>> +++ b/src/glsl/ir.h
>> @@ -1710,7 +1710,6 @@ public:
>>               operation == ir_triop_vector_insert ||
>>               operation == ir_quadop_vector ||
>>               /* TODO: these can't currently be vectorized */
>> -             operation == ir_quadop_bitfield_insert ||
>>               operation == ir_triop_bitfield_extract;
>>     }
>>
>> diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
>> index 38b6dd5..f5b5bd8 100644
>> --- a/src/glsl/ir_constant_expression.cpp
>> +++ b/src/glsl/ir_constant_expression.cpp
>> @@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
>>     }
>>
>>     case ir_quadop_bitfield_insert: {
>> -      int offset = op[2]->value.i[0];
>> -      int bits = op[3]->value.i[0];
>> -
>>        for (unsigned c = 0; c < components; c++) {
>> +         int offset = op[2]->value.i[c];
>> +         int bits = op[3]->value.i[c];
>> +
>>           if (bits == 0)
>>              data.u[c] = op[0]->value.u[c];
>>           else if (offset < 0 || bits < 0)
>> diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
>> index a4d6182..fea9b76 100644
>> --- a/src/glsl/ir_validate.cpp
>> +++ b/src/glsl/ir_validate.cpp
>> @@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir)
>>        break;
>>
>>     case ir_quadop_bitfield_insert:
>> +      assert(ir->type->is_integer());
>>        assert(ir->operands[0]->type == ir->type);
>>        assert(ir->operands[1]->type == ir->type);
>> -      assert(ir->operands[2]->type == glsl_type::int_type);
>> -      assert(ir->operands[3]->type == glsl_type::int_type);
>> +      assert(ir->operands[2]->type == ir->type);
>> +      assert(ir->operands[3]->type == ir->type);
>>        break;
>>
>>     case ir_quadop_vector:
>> diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
>> index f70db87..d140be3 100644
>> --- a/src/glsl/lower_instructions.cpp
>> +++ b/src/glsl/lower_instructions.cpp
>> @@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
>>
>>     ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
>>
>> -   ir_constant *exp_shift = new(ir) ir_constant(23);
>> -   ir_constant *exp_width = new(ir) ir_constant(8);
>> +   ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
>> +   ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
>>
>>     /* Temporary variables */
>>     ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
>> @@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
>>
>>     ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
>>
>> -   ir_constant *exp_shift = new(ir) ir_constant(20);
>> -   ir_constant *exp_width = new(ir) ir_constant(11);
>> +   ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem);
>> +   ir_constant *exp_width = new(ir) ir_constant(11, vec_elem);
>>     ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
>>
>>     /* Temporary variables */
>> diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
>> index c8bf68b..19eeaa3 100644
>> --- a/src/glsl/lower_packing_builtins.cpp
>> +++ b/src/glsl/lower_packing_builtins.cpp
>> @@ -230,8 +230,8 @@ private:
>>        if (op_mask & LOWER_PACK_USE_BFI) {
>>           return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
>>                                  swizzle_y(u),
>> -                                constant(16),
>> -                                constant(16));
>> +                                constant(16u),
>> +                                constant(16u));
>>        }
>>
>>        /* return (u.y << 16) | (u.x & 0xffff); */
>> @@ -261,9 +261,9 @@ private:
>>           return bitfield_insert(bitfield_insert(
>>                                     bitfield_insert(
>>                                        bit_and(swizzle_x(u), constant(0xffu)),
>> -                                      swizzle_y(u), constant(8), constant(8)),
>> -                                   swizzle_z(u), constant(16), constant(8)),
>> -                                swizzle_w(u), constant(24), constant(8));
>> +                                      swizzle_y(u), constant(8u), constant(8u)),
>> +                                   swizzle_z(u), constant(16u), constant(8u)),
>> +                                swizzle_w(u), constant(24u), constant(8u));
>>        }
>>
>>        /* uvec4 u = UVEC4_RVAL & 0xff */
>> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
>> index 398ae50..3780628 100644
>> --- a/src/glsl/nir/nir_opcodes.py
>> +++ b/src/glsl/nir/nir_opcodes.py
>> @@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
>>            [tuint, tuint, tuint, tuint],
>>            "", const_expr)
>>
>> -opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
>> +opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
>>         [tuint, tuint, tint, tint], "", """
>>  unsigned base = src0, insert = src1;
>> -int offset = src2.x, bits = src3.x;
>> +int offset = src2, bits = src3;
>>  if (bits == 0) {
>>     dst = 0;
>>  } else if (offset < 0 || bits < 0 || bits + offset > 32) {
>
> If the sole purpose of this is documentation, I guess it's good
> enough... but really it's "for each channel" sort of thing now.
> Otherwise this patch is
>
> Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>

It's not just for documentation -- it's used to generate the constant
folding code for bitfield_insert. But the constant folding stuff is
smart enough to see that the size above is set to 0 (which is changed
in this patch as well), which means "per-component," and it will loop
over each component and set src2 and src3 (as well as src0 and src1)
to the current component.

>
>> --
>> 2.4.9
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev