[Mesa-dev] [PATCH 06/14] nir: handle different bit sizes when constant folding

Tue Mar 15 00:05:10 UTC 2016

On Mon, Mar 14, 2016 at 7:48 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>
>
> On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez
> <siglesias at igalia.com> wrote:
>>
>> From: Connor Abbott <connor.w.abbott at intel.com>
>>
>> v2: Use the bit-size information from the opcode information if defined
>> (Iago)
>>
>> Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
>>
>> FIXME: This should be squashed into the previous commit so we don't break
>> the build. The break happens because the python script that generates the
>> constant folding pass does not know how to handle the sized types
>> introduced
>> by the previous commit until this patch, so it ends up generating code
>> with
>> invalid types. Keep it separated for review purposes.
>> ---
>>  src/compiler/nir/nir_constant_expressions.h  |   2 +-
>>  src/compiler/nir/nir_constant_expressions.py | 246
>> +++++++++++++++++----------
>>  src/compiler/nir/nir_opt_constant_folding.c  |  24 ++-
>>  3 files changed, 182 insertions(+), 90 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_constant_expressions.h
>> b/src/compiler/nir/nir_constant_expressions.h
>> index 97997f2..201f278 100644
>> --- a/src/compiler/nir/nir_constant_expressions.h
>> +++ b/src/compiler/nir/nir_constant_expressions.h
>> @@ -28,4 +28,4 @@
>>  #include "nir.h"
>>
>>  nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
>> -                                      nir_const_value *src);
>> +                                      unsigned bit_size, nir_const_value
>> *src);
>> diff --git a/src/compiler/nir/nir_constant_expressions.py
>> b/src/compiler/nir/nir_constant_expressions.py
>> index 32784f6..972d281 100644
>> --- a/src/compiler/nir/nir_constant_expressions.py
>> +++ b/src/compiler/nir/nir_constant_expressions.py
>> @@ -1,4 +1,43 @@
>>  #! /usr/bin/python2
>> +
>> +def type_has_size(type_):
>> +    return type_[-1:].isdigit()
>> +
>> +def type_sizes(type_):
>> +    if type_.endswith("8"):
>> +        return [8]
>> +    elif type_.endswith("16"):
>> +        return [16]
>> +    elif type_.endswith("32"):
>> +        return [32]
>> +    elif type_.endswith("64"):
>> +        return [64]
>> +    else:
>> +        return [32, 64]
>> +
>> +def type_add_size(type_, size):
>> +    if type_has_size(type_):
>> +        return type_
>> +    return type_ + str(size)
>> +
>> +def get_const_field(type_):
>> +    if type_ == "int32":
>> +        return "i"
>> +    if type_ == "uint32":
>> +        return "u"
>> +    if type_ == "int64":
>> +        return "l"
>> +    if type_ == "uint64":
>> +        return "ul"
>> +    if type_ == "bool32":
>> +        return "b"
>> +    if type_ == "float32":
>> +        return "f"
>> +    if type_ == "float64":
>> +        return "d"
>> +    raise Exception(str(type_))
>> +    assert(0)
>> +
>>  template = """\
>>  /*
>>   * Copyright (C) 2014 Intel Corporation
>> @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
>>  }
>>
>>  /* Some typed vector structures to make things like src0.y work */
>> -% for type in ["float", "int", "uint", "bool"]:
>> -struct ${type}_vec {
>> -   ${type} x;
>> -   ${type} y;
>> -   ${type} z;
>> -   ${type} w;
>> +typedef float float32_t;
>> +typedef double float64_t;
>> +typedef bool bool32_t;
>> +% for type in ["float", "int", "uint"]:
>> +% for width in [32, 64]:
>> +struct ${type}${width}_vec {
>> +   ${type}${width}_t x;
>> +   ${type}${width}_t y;
>> +   ${type}${width}_t z;
>> +   ${type}${width}_t w;
>>  };
>>  % endfor
>> +% endfor
>> +
>> +struct bool32_vec {
>> +    bool x;
>> +    bool y;
>> +    bool z;
>> +    bool w;
>> +};
>>
>>  % for name, op in sorted(opcodes.iteritems()):
>>  static nir_const_value
>> -evaluate_${name}(unsigned num_components, nir_const_value *_src)
>> +evaluate_${name}(unsigned num_components, unsigned bit_size,
>> +                 nir_const_value *_src)
>>  {
>>     nir_const_value _dst_val = { { {0, 0, 0, 0} } };
>>
>> -   ## For each non-per-component input, create a variable srcN that
>> -   ## contains x, y, z, and w elements which are filled in with the
>> -   ## appropriately-typed values.
>> -   % for j in range(op.num_inputs):
>> -      % if op.input_sizes[j] == 0:
>> -         <% continue %>
>> -      % elif "src" + str(j) not in op.const_expr:
>> -         ## Avoid unused variable warnings
>> -         <% continue %>
>> -      %endif
>> -
>> -      struct ${op.input_types[j]}_vec src${j} = {
>> -      % for k in range(op.input_sizes[j]):
>> -         % if op.input_types[j] == "bool":
>> -            _src[${j}].u[${k}] != 0,
>> -         % else:
>> -            _src[${j}].${op.input_types[j][:1]}[${k}],
>> -         % endif
>> -      % endfor
>> -      };
>> -   % endfor
>> +   switch (bit_size) {
>> +   % for bit_size in [32, 64]:
>> +   case ${bit_size}: {
>> +      <%
>> +      output_type = type_add_size(op.output_type, bit_size)
>> +      input_types = [type_add_size(type_, bit_size) for type_ in
>> op.input_types]
>> +      %>
>> +
>> +      ## For each non-per-component input, create a variable srcN that
>> +      ## contains x, y, z, and w elements which are filled in with the
>> +      ## appropriately-typed values.
>> +      % for j in range(op.num_inputs):
>> +         % if op.input_sizes[j] == 0:
>> +            <% continue %>
>> +         % elif "src" + str(j) not in op.const_expr:
>> +            ## Avoid unused variable warnings
>> +            <% continue %>
>> +         %endif
>>
>> -   % if op.output_size == 0:
>> -      ## For per-component instructions, we need to iterate over the
>> -      ## components and apply the constant expression one component
>> -      ## at a time.
>> -      for (unsigned _i = 0; _i < num_components; _i++) {
>> -         ## For each per-component input, create a variable srcN that
>> -         ## contains the value of the current (_i'th) component.
>> -         % for j in range(op.num_inputs):
>> -            % if op.input_sizes[j] != 0:
>> -               <% continue %>
>> -            % elif "src" + str(j) not in op.const_expr:
>> -               ## Avoid unused variable warnings
>> -               <% continue %>
>> -            % elif op.input_types[j] == "bool":
>> -               bool src${j} = _src[${j}].u[_i] != 0;
>> +         struct ${input_types[j]}_vec src${j} = {
>> +         % for k in range(op.input_sizes[j]):
>> +            % if input_types[j] == "bool32":
>> +               _src[${j}].u[${k}] != 0,
>>              % else:
>> -               ${op.input_types[j]} src${j} =
>> _src[${j}].${op.input_types[j][:1]}[_i];
>> +               _src[${j}].${get_const_field(input_types[j])}[${k}],
>>              % endif
>>           % endfor
>> +         };
>> +      % endfor
>> +
>> +      % if op.output_size == 0:
>> +         ## For per-component instructions, we need to iterate over the
>> +         ## components and apply the constant expression one component
>> +         ## at a time.
>> +         for (unsigned _i = 0; _i < num_components; _i++) {
>> +            ## For each per-component input, create a variable srcN that
>> +            ## contains the value of the current (_i'th) component.
>> +            % for j in range(op.num_inputs):
>> +               % if op.input_sizes[j] != 0:
>> +                  <% continue %>
>> +               % elif "src" + str(j) not in op.const_expr:
>> +                  ## Avoid unused variable warnings
>> +                  <% continue %>
>> +               % elif input_types[j] == "bool32":
>> +                  bool src${j} = _src[${j}].u[_i] != 0;
>> +               % else:
>> +                  ${input_types[j]}_t src${j} =
>> +                     _src[${j}].${get_const_field(input_types[j])}[_i];
>> +               % endif
>> +            % endfor
>> +
>> +            ## Create an appropriately-typed variable dst and assign the
>> +            ## result of the const_expr to it.  If const_expr already
>> contains
>> +            ## writes to dst, just include const_expr directly.
>> +            % if "dst" in op.const_expr:
>> +               ${output_type}_t dst;
>> +               ${op.const_expr}
>> +            % else:
>> +               ${output_type}_t dst = ${op.const_expr};
>> +            % endif
>> +
>> +            ## Store the current component of the actual destination to
>> the
>> +            ## value of dst.
>> +            % if output_type == "bool32":
>> +               ## Sanitize the C value to a proper NIR bool
>> +               _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
>> +            % else:
>> +               _dst_val.${get_const_field(output_type)}[_i] = dst;
>> +            % endif
>> +         }
>> +      % else:
>> +         ## In the non-per-component case, create a struct dst with
>> +         ## appropriately-typed elements x, y, z, and w and assign the
>> result
>> +         ## of the const_expr to all components of dst, or include the
>> +         ## const_expr directly if it writes to dst already.
>> +         struct ${output_type}_vec dst;
>>
>> -         ## Create an appropriately-typed variable dst and assign the
>> -         ## result of the const_expr to it.  If const_expr already
>> contains
>> -         ## writes to dst, just include const_expr directly.
>>           % if "dst" in op.const_expr:
>> -            ${op.output_type} dst;
>>              ${op.const_expr}
>>           % else:
>> -            ${op.output_type} dst = ${op.const_expr};
>> +            ## Splat the value to all components.  This way expressions
>> which
>> +            ## write the same value to all components don't need to
>> explicitly
>> +            ## write to dest.  One such example is fnoise which has a
>> +            ## const_expr of 0.0f.
>> +            dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
>>           % endif
>>
>> -         ## Store the current component of the actual destination to the
>> -         ## value of dst.
>> -         % if op.output_type == "bool":
>> -            ## Sanitize the C value to a proper NIR bool
>> -            _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
>> -         % else:
>> -            _dst_val.${op.output_type[:1]}[_i] = dst;
>> -         % endif
>> -      }
>> -   % else:
>> -      ## In the non-per-component case, create a struct dst with
>> -      ## appropriately-typed elements x, y, z, and w and assign the
>> result
>> -      ## of the const_expr to all components of dst, or include the
>> -      ## const_expr directly if it writes to dst already.
>> -      struct ${op.output_type}_vec dst;
>> -
>> -      % if "dst" in op.const_expr:
>> -         ${op.const_expr}
>> -      % else:
>> -         ## Splat the value to all components.  This way expressions
>> which
>> -         ## write the same value to all components don't need to
>> explicitly
>> -         ## write to dest.  One such example is fnoise which has a
>> -         ## const_expr of 0.0f.
>> -         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
>> +         ## For each component in the destination, copy the value of dst
>> to
>> +         ## the actual destination.
>> +         % for k in range(op.output_size):
>> +            % if output_type == "bool32":
>> +               ## Sanitize the C value to a proper NIR bool
>> +               _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE :
>> NIR_FALSE;
>> +            % else:
>> +               _dst_val.${get_const_field(output_type)}[${k}] =
>> dst.${"xyzw"[k]};
>> +            % endif
>> +         % endfor
>>        % endif
>>
>> -      ## For each component in the destination, copy the value of dst to
>> -      ## the actual destination.
>> -      % for k in range(op.output_size):
>> -         % if op.output_type == "bool":
>> -            ## Sanitize the C value to a proper NIR bool
>> -            _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
>> -         % else:
>> -            _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
>> -         % endif
>> -      % endfor
>> -   % endif
>> +      break;
>> +   }
>> +   % endfor
>> +
>> +   default:
>> +      unreachable("unknown bit width");
>> +   }
>>
>>     return _dst_val;
>>  }
>> @@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components,
>> nir_const_value *_src)
>>
>>  nir_const_value
>>  nir_eval_const_opcode(nir_op op, unsigned num_components,
>> -                      nir_const_value *src)
>> +                      unsigned bit_width, nir_const_value *src)
>>  {
>>     switch (op) {
>>  % for name in sorted(opcodes.iterkeys()):
>>     case nir_op_${name}: {
>> -      return evaluate_${name}(num_components, src);
>> +      return evaluate_${name}(num_components, bit_width, src);
>>        break;
>>     }
>>  % endfor
>> @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned
>> num_components,
>>  from nir_opcodes import opcodes
>>  from mako.template import Template
>>
>> -print Template(template).render(opcodes=opcodes)
>> +print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
>> +                                type_has_size=type_has_size,
>> +                                type_add_size=type_add_size,
>> +                                get_const_field=get_const_field)
>> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
>> b/src/compiler/nir/nir_opt_constant_folding.c
>> index 04876a4..29905a0 100644
>> --- a/src/compiler/nir/nir_opt_constant_folding.c
>> +++ b/src/compiler/nir/nir_opt_constant_folding.c
>> @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr *instr, void
>> *mem_ctx)
>>     if (!instr->dest.dest.is_ssa)
>>        return false;
>>
>> +   unsigned bit_size = 0;
>> +   if (!(nir_op_infos[instr->op].output_type & NIR_ALU_TYPE_SIZE_MASK))
>> +      bit_size = instr->dest.dest.ssa.bit_size;
>> +   else
>> +      bit_size = nir_op_infos[instr->op].output_type &
>> NIR_ALU_TYPE_SIZE_MASK;
>
>
> This isn't right.  We need to look at all the unsized types and try to pull
> it from one of those.  We shouldn't fall back to grabbing from the sized
> type.
>
>>
>> +
>>     for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
>>        if (!instr->src[i].src.is_ssa)
>>           return false;
>>
>> +      if (bit_size == 0) {
>> +         if (!(nir_op_infos[instr->op].input_sizes[i] &
>> NIR_ALU_TYPE_SIZE_MASK))
>> +            bit_size = instr->src[i].src.ssa->bit_size;
>> +         else
>> +            bit_size = nir_op_infos[instr->op].input_sizes[i] &
>> NIR_ALU_TYPE_SIZE_MASK;
>
>
> Same here.  If they don't have any unsized sources or destinations to grab
> from, we should let bit_size be zero.
>
> Also, if we have multiple sources with the same unsized type, we should
> assert that the sizes match.
>
>>
>> +      }
>> +
>>        nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
>>
>>        if (src_instr->type != nir_instr_type_load_const)
>> @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void
>> *mem_ctx)
>>
>>        for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr,
>> i);
>>             j++) {
>> -         src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
>> +         if (load_const->def.bit_size == 64)
>> +            src[i].ul[j] =
>> load_const->value.ul[instr->src[i].swizzle[j]];
>> +         else
>> +            src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
>>        }
>>
>>        /* We shouldn't have any source modifiers in the optimization loop.
>> */
>>        assert(!instr->src[i].abs && !instr->src[i].negate);
>>     }
>>
>> +   if (bit_size == 0)
>> +      bit_size = 32;
>
>
> Then it'll get set here.

Yeah, idk what I was thinking here. I think the validator already
asserts that the unsized sources/dests have equal bitsizes, but we
could assert here too.

>
>>
>> +
>>     /* We shouldn't have any saturate modifiers in the optimization loop.
>> */
>>     assert(!instr->dest.saturate);
>>
>>     nir_const_value dest =
>>        nir_eval_const_opcode(instr->op,
>> instr->dest.dest.ssa.num_components,
>> -                            src);
>> +                            bit_size, src);
>>
>>     nir_load_const_instr *new_instr =
>>        nir_load_const_instr_create(mem_ctx,
>>                                    instr->dest.dest.ssa.num_components);
>>
>> +   new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
>>     new_instr->value = dest;
>>
>>     nir_instr_insert_before(&instr->instr, &new_instr->instr);
>> --
>> 2.7.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>