[Mesa-dev] [PATCH 06/14] nir: handle different bit sizes when constant folding

Jason Ekstrand jason at jlekstrand.net
Mon Mar 14 23:48:33 UTC 2016


On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez <
siglesias at igalia.com> wrote:

> From: Connor Abbott <connor.w.abbott at intel.com>
>
> v2: Use the bit-size information from the opcode information if defined
> (Iago)
>
> Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
>
> FIXME: This should be squashed into the previous commit so we don't break
> the build. The break happens because the python script that generates the
> constant folding pass does not know how to handle the sized types
> introduced
> by the previous commit until this patch, so it ends up generating code with
> invalid types. Keep it separated for review purposes.
> ---
>  src/compiler/nir/nir_constant_expressions.h  |   2 +-
>  src/compiler/nir/nir_constant_expressions.py | 246
> +++++++++++++++++----------
>  src/compiler/nir/nir_opt_constant_folding.c  |  24 ++-
>  3 files changed, 182 insertions(+), 90 deletions(-)
>
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 97997f2..201f278 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -28,4 +28,4 @@
>  #include "nir.h"
>
>  nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
> -                                      nir_const_value *src);
> +                                      unsigned bit_size, nir_const_value
> *src);
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 32784f6..972d281 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -1,4 +1,43 @@
>  #! /usr/bin/python2
> +
> +def type_has_size(type_):
> +    return type_[-1:].isdigit()
> +
> +def type_sizes(type_):
> +    if type_.endswith("8"):
> +        return [8]
> +    elif type_.endswith("16"):
> +        return [16]
> +    elif type_.endswith("32"):
> +        return [32]
> +    elif type_.endswith("64"):
> +        return [64]
> +    else:
> +        return [32, 64]
> +
> +def type_add_size(type_, size):
> +    if type_has_size(type_):
> +        return type_
> +    return type_ + str(size)
> +
> +def get_const_field(type_):
> +    if type_ == "int32":
> +        return "i"
> +    if type_ == "uint32":
> +        return "u"
> +    if type_ == "int64":
> +        return "l"
> +    if type_ == "uint64":
> +        return "ul"
> +    if type_ == "bool32":
> +        return "b"
> +    if type_ == "float32":
> +        return "f"
> +    if type_ == "float64":
> +        return "d"
> +    raise Exception(str(type_))
> +    assert(0)
> +
>  template = """\
>  /*
>   * Copyright (C) 2014 Intel Corporation
> @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
>  }
>
>  /* Some typed vector structures to make things like src0.y work */
> -% for type in ["float", "int", "uint", "bool"]:
> -struct ${type}_vec {
> -   ${type} x;
> -   ${type} y;
> -   ${type} z;
> -   ${type} w;
> +typedef float float32_t;
> +typedef double float64_t;
> +typedef bool bool32_t;
> +% for type in ["float", "int", "uint"]:
> +% for width in [32, 64]:
> +struct ${type}${width}_vec {
> +   ${type}${width}_t x;
> +   ${type}${width}_t y;
> +   ${type}${width}_t z;
> +   ${type}${width}_t w;
>  };
>  % endfor
> +% endfor
> +
> +struct bool32_vec {
> +    bool x;
> +    bool y;
> +    bool z;
> +    bool w;
> +};
>
>  % for name, op in sorted(opcodes.iteritems()):
>  static nir_const_value
> -evaluate_${name}(unsigned num_components, nir_const_value *_src)
> +evaluate_${name}(unsigned num_components, unsigned bit_size,
> +                 nir_const_value *_src)
>  {
>     nir_const_value _dst_val = { { {0, 0, 0, 0} } };
>
> -   ## For each non-per-component input, create a variable srcN that
> -   ## contains x, y, z, and w elements which are filled in with the
> -   ## appropriately-typed values.
> -   % for j in range(op.num_inputs):
> -      % if op.input_sizes[j] == 0:
> -         <% continue %>
> -      % elif "src" + str(j) not in op.const_expr:
> -         ## Avoid unused variable warnings
> -         <% continue %>
> -      %endif
> -
> -      struct ${op.input_types[j]}_vec src${j} = {
> -      % for k in range(op.input_sizes[j]):
> -         % if op.input_types[j] == "bool":
> -            _src[${j}].u[${k}] != 0,
> -         % else:
> -            _src[${j}].${op.input_types[j][:1]}[${k}],
> -         % endif
> -      % endfor
> -      };
> -   % endfor
> +   switch (bit_size) {
> +   % for bit_size in [32, 64]:
> +   case ${bit_size}: {
> +      <%
> +      output_type = type_add_size(op.output_type, bit_size)
> +      input_types = [type_add_size(type_, bit_size) for type_ in
> op.input_types]
> +      %>
> +
> +      ## For each non-per-component input, create a variable srcN that
> +      ## contains x, y, z, and w elements which are filled in with the
> +      ## appropriately-typed values.
> +      % for j in range(op.num_inputs):
> +         % if op.input_sizes[j] == 0:
> +            <% continue %>
> +         % elif "src" + str(j) not in op.const_expr:
> +            ## Avoid unused variable warnings
> +            <% continue %>
> +         %endif
>
> -   % if op.output_size == 0:
> -      ## For per-component instructions, we need to iterate over the
> -      ## components and apply the constant expression one component
> -      ## at a time.
> -      for (unsigned _i = 0; _i < num_components; _i++) {
> -         ## For each per-component input, create a variable srcN that
> -         ## contains the value of the current (_i'th) component.
> -         % for j in range(op.num_inputs):
> -            % if op.input_sizes[j] != 0:
> -               <% continue %>
> -            % elif "src" + str(j) not in op.const_expr:
> -               ## Avoid unused variable warnings
> -               <% continue %>
> -            % elif op.input_types[j] == "bool":
> -               bool src${j} = _src[${j}].u[_i] != 0;
> +         struct ${input_types[j]}_vec src${j} = {
> +         % for k in range(op.input_sizes[j]):
> +            % if input_types[j] == "bool32":
> +               _src[${j}].u[${k}] != 0,
>              % else:
> -               ${op.input_types[j]} src${j} =
> _src[${j}].${op.input_types[j][:1]}[_i];
> +               _src[${j}].${get_const_field(input_types[j])}[${k}],
>              % endif
>           % endfor
> +         };
> +      % endfor
> +
> +      % if op.output_size == 0:
> +         ## For per-component instructions, we need to iterate over the
> +         ## components and apply the constant expression one component
> +         ## at a time.
> +         for (unsigned _i = 0; _i < num_components; _i++) {
> +            ## For each per-component input, create a variable srcN that
> +            ## contains the value of the current (_i'th) component.
> +            % for j in range(op.num_inputs):
> +               % if op.input_sizes[j] != 0:
> +                  <% continue %>
> +               % elif "src" + str(j) not in op.const_expr:
> +                  ## Avoid unused variable warnings
> +                  <% continue %>
> +               % elif input_types[j] == "bool32":
> +                  bool src${j} = _src[${j}].u[_i] != 0;
> +               % else:
> +                  ${input_types[j]}_t src${j} =
> +                     _src[${j}].${get_const_field(input_types[j])}[_i];
> +               % endif
> +            % endfor
> +
> +            ## Create an appropriately-typed variable dst and assign the
> +            ## result of the const_expr to it.  If const_expr already
> contains
> +            ## writes to dst, just include const_expr directly.
> +            % if "dst" in op.const_expr:
> +               ${output_type}_t dst;
> +               ${op.const_expr}
> +            % else:
> +               ${output_type}_t dst = ${op.const_expr};
> +            % endif
> +
> +            ## Store the current component of the actual destination to
> the
> +            ## value of dst.
> +            % if output_type == "bool32":
> +               ## Sanitize the C value to a proper NIR bool
> +               _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
> +            % else:
> +               _dst_val.${get_const_field(output_type)}[_i] = dst;
> +            % endif
> +         }
> +      % else:
> +         ## In the non-per-component case, create a struct dst with
> +         ## appropriately-typed elements x, y, z, and w and assign the
> result
> +         ## of the const_expr to all components of dst, or include the
> +         ## const_expr directly if it writes to dst already.
> +         struct ${output_type}_vec dst;
>
> -         ## Create an appropriately-typed variable dst and assign the
> -         ## result of the const_expr to it.  If const_expr already
> contains
> -         ## writes to dst, just include const_expr directly.
>           % if "dst" in op.const_expr:
> -            ${op.output_type} dst;
>              ${op.const_expr}
>           % else:
> -            ${op.output_type} dst = ${op.const_expr};
> +            ## Splat the value to all components.  This way expressions
> which
> +            ## write the same value to all components don't need to
> explicitly
> +            ## write to dest.  One such example is fnoise which has a
> +            ## const_expr of 0.0f.
> +            dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
>           % endif
>
> -         ## Store the current component of the actual destination to the
> -         ## value of dst.
> -         % if op.output_type == "bool":
> -            ## Sanitize the C value to a proper NIR bool
> -            _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
> -         % else:
> -            _dst_val.${op.output_type[:1]}[_i] = dst;
> -         % endif
> -      }
> -   % else:
> -      ## In the non-per-component case, create a struct dst with
> -      ## appropriately-typed elements x, y, z, and w and assign the result
> -      ## of the const_expr to all components of dst, or include the
> -      ## const_expr directly if it writes to dst already.
> -      struct ${op.output_type}_vec dst;
> -
> -      % if "dst" in op.const_expr:
> -         ${op.const_expr}
> -      % else:
> -         ## Splat the value to all components.  This way expressions which
> -         ## write the same value to all components don't need to
> explicitly
> -         ## write to dest.  One such example is fnoise which has a
> -         ## const_expr of 0.0f.
> -         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
> +         ## For each component in the destination, copy the value of dst
> to
> +         ## the actual destination.
> +         % for k in range(op.output_size):
> +            % if output_type == "bool32":
> +               ## Sanitize the C value to a proper NIR bool
> +               _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
> +            % else:
> +               _dst_val.${get_const_field(output_type)}[${k}] =
> dst.${"xyzw"[k]};
> +            % endif
> +         % endfor
>        % endif
>
> -      ## For each component in the destination, copy the value of dst to
> -      ## the actual destination.
> -      % for k in range(op.output_size):
> -         % if op.output_type == "bool":
> -            ## Sanitize the C value to a proper NIR bool
> -            _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
> -         % else:
> -            _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
> -         % endif
> -      % endfor
> -   % endif
> +      break;
> +   }
> +   % endfor
> +
> +   default:
> +      unreachable("unknown bit width");
> +   }
>
>     return _dst_val;
>  }
> @@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components,
> nir_const_value *_src)
>
>  nir_const_value
>  nir_eval_const_opcode(nir_op op, unsigned num_components,
> -                      nir_const_value *src)
> +                      unsigned bit_width, nir_const_value *src)
>  {
>     switch (op) {
>  % for name in sorted(opcodes.iterkeys()):
>     case nir_op_${name}: {
> -      return evaluate_${name}(num_components, src);
> +      return evaluate_${name}(num_components, bit_width, src);
>        break;
>     }
>  % endfor
> @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned
> num_components,
>  from nir_opcodes import opcodes
>  from mako.template import Template
>
> -print Template(template).render(opcodes=opcodes)
> +print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
> +                                type_has_size=type_has_size,
> +                                type_add_size=type_add_size,
> +                                get_const_field=get_const_field)
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
> b/src/compiler/nir/nir_opt_constant_folding.c
> index 04876a4..29905a0 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr *instr, void
> *mem_ctx)
>     if (!instr->dest.dest.is_ssa)
>        return false;
>
> +   unsigned bit_size = 0;
> +   if (!(nir_op_infos[instr->op].output_type & NIR_ALU_TYPE_SIZE_MASK))
> +      bit_size = instr->dest.dest.ssa.bit_size;
> +   else
> +      bit_size = nir_op_infos[instr->op].output_type &
> NIR_ALU_TYPE_SIZE_MASK;
>

This isn't right.  We need to look at all the unsized types and try to pull
it from one of those.  We shouldn't fall back to grabbing from the sized
type.


> +
>     for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
>        if (!instr->src[i].src.is_ssa)
>           return false;
>
> +      if (bit_size == 0) {
> +         if (!(nir_op_infos[instr->op].input_sizes[i] &
> NIR_ALU_TYPE_SIZE_MASK))
> +            bit_size = instr->src[i].src.ssa->bit_size;
> +         else
> +            bit_size = nir_op_infos[instr->op].input_sizes[i] &
> NIR_ALU_TYPE_SIZE_MASK;
>

Same here.  If they don't have any unsized sources or destinations to grab
from, we should let bit_size be zero.

Also, if we have multiple sources with the same unsized type, we should
assert that the sizes match.


> +      }
> +
>        nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
>
>        if (src_instr->type != nir_instr_type_load_const)
> @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void
> *mem_ctx)
>
>        for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
>             j++) {
> -         src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
> +         if (load_const->def.bit_size == 64)
> +            src[i].ul[j] = load_const->value.ul[instr->src[i].swizzle[j]];
> +         else
> +            src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
>        }
>
>        /* We shouldn't have any source modifiers in the optimization loop.
> */
>        assert(!instr->src[i].abs && !instr->src[i].negate);
>     }
>
> +   if (bit_size == 0)
> +      bit_size = 32;
>

Then it'll get set here.


> +
>     /* We shouldn't have any saturate modifiers in the optimization loop.
> */
>     assert(!instr->dest.saturate);
>
>     nir_const_value dest =
>        nir_eval_const_opcode(instr->op,
> instr->dest.dest.ssa.num_components,
> -                            src);
> +                            bit_size, src);
>
>     nir_load_const_instr *new_instr =
>        nir_load_const_instr_create(mem_ctx,
>                                    instr->dest.dest.ssa.num_components);
>
> +   new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
>     new_instr->value = dest;
>
>     nir_instr_insert_before(&instr->instr, &new_instr->instr);
> --
> 2.7.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160314/3da10355/attachment-0001.html>


More information about the mesa-dev mailing list