[Mesa-dev] [PATCH 06/14] nir: handle different bit sizes when constant folding
Iago Toral
itoral at igalia.com
Tue Mar 15 09:53:41 UTC 2016
On Mon, 2016-03-14 at 16:48 -0700, Jason Ekstrand wrote:
>
>
> On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez
> <siglesias at igalia.com> wrote:
> From: Connor Abbott <connor.w.abbott at intel.com>
>
> v2: Use the bit-size information from the opcode information
> if defined (Iago)
>
> Signed-off-by: Iago Toral Quiroga <itoral at igalia.com>
>
> FIXME: This should be squashed into the previous commit so we
> don't break
> the build. The break happens because the python script that
> generates the
> constant folding pass does not know how to handle the sized
> types introduced
> by the previous commit until this patch, so it ends up
> generating code with
> invalid types. Keep it separated for review purposes.
> ---
> src/compiler/nir/nir_constant_expressions.h | 2 +-
> src/compiler/nir/nir_constant_expressions.py | 246
> +++++++++++++++++----------
> src/compiler/nir/nir_opt_constant_folding.c | 24 ++-
> 3 files changed, 182 insertions(+), 90 deletions(-)
>
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 97997f2..201f278 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -28,4 +28,4 @@
> #include "nir.h"
>
> nir_const_value nir_eval_const_opcode(nir_op op, unsigned
> num_components,
> - nir_const_value *src);
> + unsigned bit_size,
> nir_const_value *src);
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 32784f6..972d281 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -1,4 +1,43 @@
> #! /usr/bin/python2
> +
> +def type_has_size(type_):
> + return type_[-1:].isdigit()
> +
> +def type_sizes(type_):
> + if type_.endswith("8"):
> + return [8]
> + elif type_.endswith("16"):
> + return [16]
> + elif type_.endswith("32"):
> + return [32]
> + elif type_.endswith("64"):
> + return [64]
> + else:
> + return [32, 64]
> +
> +def type_add_size(type_, size):
> + if type_has_size(type_):
> + return type_
> + return type_ + str(size)
> +
> +def get_const_field(type_):
> + if type_ == "int32":
> + return "i"
> + if type_ == "uint32":
> + return "u"
> + if type_ == "int64":
> + return "l"
> + if type_ == "uint64":
> + return "ul"
> + if type_ == "bool32":
> + return "b"
> + if type_ == "float32":
> + return "f"
> + if type_ == "float64":
> + return "d"
> + raise Exception(str(type_))
> + assert(0)
> +
> template = """\
> /*
> * Copyright (C) 2014 Intel Corporation
> @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
> }
>
> /* Some typed vector structures to make things like src0.y
> work */
> -% for type in ["float", "int", "uint", "bool"]:
> -struct ${type}_vec {
> - ${type} x;
> - ${type} y;
> - ${type} z;
> - ${type} w;
> +typedef float float32_t;
> +typedef double float64_t;
> +typedef bool bool32_t;
> +% for type in ["float", "int", "uint"]:
> +% for width in [32, 64]:
> +struct ${type}${width}_vec {
> + ${type}${width}_t x;
> + ${type}${width}_t y;
> + ${type}${width}_t z;
> + ${type}${width}_t w;
> };
> % endfor
> +% endfor
> +
> +struct bool32_vec {
> + bool x;
> + bool y;
> + bool z;
> + bool w;
> +};
>
> % for name, op in sorted(opcodes.iteritems()):
> static nir_const_value
> -evaluate_${name}(unsigned num_components, nir_const_value
> *_src)
> +evaluate_${name}(unsigned num_components, unsigned bit_size,
> + nir_const_value *_src)
> {
> nir_const_value _dst_val = { { {0, 0, 0, 0} } };
>
> - ## For each non-per-component input, create a variable
> srcN that
> - ## contains x, y, z, and w elements which are filled in
> with the
> - ## appropriately-typed values.
> - % for j in range(op.num_inputs):
> - % if op.input_sizes[j] == 0:
> - <% continue %>
> - % elif "src" + str(j) not in op.const_expr:
> - ## Avoid unused variable warnings
> - <% continue %>
> - %endif
> -
> - struct ${op.input_types[j]}_vec src${j} = {
> - % for k in range(op.input_sizes[j]):
> - % if op.input_types[j] == "bool":
> - _src[${j}].u[${k}] != 0,
> - % else:
> - _src[${j}].${op.input_types[j][:1]}[${k}],
> - % endif
> - % endfor
> - };
> - % endfor
> + switch (bit_size) {
> + % for bit_size in [32, 64]:
> + case ${bit_size}: {
> + <%
> + output_type = type_add_size(op.output_type, bit_size)
> + input_types = [type_add_size(type_, bit_size) for type_
> in op.input_types]
> + %>
> +
> + ## For each non-per-component input, create a variable
> srcN that
> + ## contains x, y, z, and w elements which are filled in
> with the
> + ## appropriately-typed values.
> + % for j in range(op.num_inputs):
> + % if op.input_sizes[j] == 0:
> + <% continue %>
> + % elif "src" + str(j) not in op.const_expr:
> + ## Avoid unused variable warnings
> + <% continue %>
> + %endif
>
> - % if op.output_size == 0:
> - ## For per-component instructions, we need to iterate
> over the
> - ## components and apply the constant expression one
> component
> - ## at a time.
> - for (unsigned _i = 0; _i < num_components; _i++) {
> - ## For each per-component input, create a variable
> srcN that
> - ## contains the value of the current (_i'th)
> component.
> - % for j in range(op.num_inputs):
> - % if op.input_sizes[j] != 0:
> - <% continue %>
> - % elif "src" + str(j) not in op.const_expr:
> - ## Avoid unused variable warnings
> - <% continue %>
> - % elif op.input_types[j] == "bool":
> - bool src${j} = _src[${j}].u[_i] != 0;
> + struct ${input_types[j]}_vec src${j} = {
> + % for k in range(op.input_sizes[j]):
> + % if input_types[j] == "bool32":
> + _src[${j}].u[${k}] != 0,
> % else:
> - ${op.input_types[j]} src${j} =
> _src[${j}].${op.input_types[j][:1]}[_i];
> +
> _src[${j}].${get_const_field(input_types[j])}[${k}],
> % endif
> % endfor
> + };
> + % endfor
> +
> + % if op.output_size == 0:
> + ## For per-component instructions, we need to
> iterate over the
> + ## components and apply the constant expression one
> component
> + ## at a time.
> + for (unsigned _i = 0; _i < num_components; _i++) {
> + ## For each per-component input, create a
> variable srcN that
> + ## contains the value of the current (_i'th)
> component.
> + % for j in range(op.num_inputs):
> + % if op.input_sizes[j] != 0:
> + <% continue %>
> + % elif "src" + str(j) not in op.const_expr:
> + ## Avoid unused variable warnings
> + <% continue %>
> + % elif input_types[j] == "bool32":
> + bool src${j} = _src[${j}].u[_i] != 0;
> + % else:
> + ${input_types[j]}_t src${j} =
> +
> _src[${j}].${get_const_field(input_types[j])}[_i];
> + % endif
> + % endfor
> +
> + ## Create an appropriately-typed variable dst and
> assign the
> + ## result of the const_expr to it. If const_expr
> already contains
> + ## writes to dst, just include const_expr
> directly.
> + % if "dst" in op.const_expr:
> + ${output_type}_t dst;
> + ${op.const_expr}
> + % else:
> + ${output_type}_t dst = ${op.const_expr};
> + % endif
> +
> + ## Store the current component of the actual
> destination to the
> + ## value of dst.
> + % if output_type == "bool32":
> + ## Sanitize the C value to a proper NIR bool
> + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
> + % else:
> + _dst_val.${get_const_field(output_type)}[_i] =
> dst;
> + % endif
> + }
> + % else:
> + ## In the non-per-component case, create a struct
> dst with
> + ## appropriately-typed elements x, y, z, and w and
> assign the result
> + ## of the const_expr to all components of dst, or
> include the
> + ## const_expr directly if it writes to dst already.
> + struct ${output_type}_vec dst;
>
> - ## Create an appropriately-typed variable dst and
> assign the
> - ## result of the const_expr to it. If const_expr
> already contains
> - ## writes to dst, just include const_expr directly.
> % if "dst" in op.const_expr:
> - ${op.output_type} dst;
> ${op.const_expr}
> % else:
> - ${op.output_type} dst = ${op.const_expr};
> + ## Splat the value to all components. This way
> expressions which
> + ## write the same value to all components don't
> need to explicitly
> + ## write to dest. One such example is fnoise
> which has a
> + ## const_expr of 0.0f.
> + dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
> % endif
>
> - ## Store the current component of the actual
> destination to the
> - ## value of dst.
> - % if op.output_type == "bool":
> - ## Sanitize the C value to a proper NIR bool
> - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
> - % else:
> - _dst_val.${op.output_type[:1]}[_i] = dst;
> - % endif
> - }
> - % else:
> - ## In the non-per-component case, create a struct dst
> with
> - ## appropriately-typed elements x, y, z, and w and
> assign the result
> - ## of the const_expr to all components of dst, or
> include the
> - ## const_expr directly if it writes to dst already.
> - struct ${op.output_type}_vec dst;
> -
> - % if "dst" in op.const_expr:
> - ${op.const_expr}
> - % else:
> - ## Splat the value to all components. This way
> expressions which
> - ## write the same value to all components don't need
> to explicitly
> - ## write to dest. One such example is fnoise which
> has a
> - ## const_expr of 0.0f.
> - dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
> + ## For each component in the destination, copy the
> value of dst to
> + ## the actual destination.
> + % for k in range(op.output_size):
> + % if output_type == "bool32":
> + ## Sanitize the C value to a proper NIR bool
> + _dst_val.u[${k}] = dst.${"xyzw"[k]} ?
> NIR_TRUE : NIR_FALSE;
> + % else:
> + _dst_val.${get_const_field(output_type)}[${k}]
> = dst.${"xyzw"[k]};
> + % endif
> + % endfor
> % endif
>
> - ## For each component in the destination, copy the
> value of dst to
> - ## the actual destination.
> - % for k in range(op.output_size):
> - % if op.output_type == "bool":
> - ## Sanitize the C value to a proper NIR bool
> - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE :
> NIR_FALSE;
> - % else:
> - _dst_val.${op.output_type[:1]}[${k}] =
> dst.${"xyzw"[k]};
> - % endif
> - % endfor
> - % endif
> + break;
> + }
> + % endfor
> +
> + default:
> + unreachable("unknown bit width");
> + }
>
> return _dst_val;
> }
> @@ -316,12 +385,12 @@ evaluate_${name}(unsigned
> num_components, nir_const_value *_src)
>
> nir_const_value
> nir_eval_const_opcode(nir_op op, unsigned num_components,
> - nir_const_value *src)
> + unsigned bit_width, nir_const_value
> *src)
> {
> switch (op) {
> % for name in sorted(opcodes.iterkeys()):
> case nir_op_${name}: {
> - return evaluate_${name}(num_components, src);
> + return evaluate_${name}(num_components, bit_width,
> src);
> break;
> }
> % endfor
> @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned
> num_components,
> from nir_opcodes import opcodes
> from mako.template import Template
>
> -print Template(template).render(opcodes=opcodes)
> +print Template(template).render(opcodes=opcodes,
> type_sizes=type_sizes,
> + type_has_size=type_has_size,
> + type_add_size=type_add_size,
> +
> get_const_field=get_const_field)
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
> b/src/compiler/nir/nir_opt_constant_folding.c
> index 04876a4..29905a0 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -46,10 +46,23 @@ constant_fold_alu_instr(nir_alu_instr
> *instr, void *mem_ctx)
> if (!instr->dest.dest.is_ssa)
> return false;
>
> + unsigned bit_size = 0;
> + if (!(nir_op_infos[instr->op].output_type &
> NIR_ALU_TYPE_SIZE_MASK))
> + bit_size = instr->dest.dest.ssa.bit_size;
> + else
> + bit_size = nir_op_infos[instr->op].output_type &
> NIR_ALU_TYPE_SIZE_MASK;
>
>
> This isn't right. We need to look at all the unsized types and try to
> pull it from one of those. We shouldn't fall back to grabbing from
> the sized type.
Ok, so you don't like that in the case that the alu operation has a
sized destination we grab the bit-size from the opcode definition? I am
not sure I see the problem with that... isn't the opcode mandating a
specific bit-size in that case? How can the bit-size we want be
different from that?
>
> +
> for (unsigned i = 0; i <
> nir_op_infos[instr->op].num_inputs; i++) {
> if (!instr->src[i].src.is_ssa)
> return false;
>
> + if (bit_size == 0) {
> + if (!(nir_op_infos[instr->op].input_sizes[i] &
> NIR_ALU_TYPE_SIZE_MASK))
> + bit_size = instr->src[i].src.ssa->bit_size;
> + else
> + bit_size = nir_op_infos[instr->op].input_sizes[i]
> & NIR_ALU_TYPE_SIZE_MASK;
>
>
> Same here. If they don't have any unsized sources or destinations to
> grab from, we should let bit_size be zero.
But if we have an opcode with all sized 64-bit types then...
> Also, if we have multiple sources with the same unsized type, we
> should assert that the sizes match.
>
>
> + }
> +
> nir_instr *src_instr =
> instr->src[i].src.ssa->parent_instr;
>
> if (src_instr->type != nir_instr_type_load_const)
> @@ -58,24 +71,31 @@ constant_fold_alu_instr(nir_alu_instr
> *instr, void *mem_ctx)
>
> for (unsigned j = 0; j <
> nir_ssa_alu_instr_src_components(instr, i);
> j++) {
> - src[i].u[j] =
> load_const->value.u[instr->src[i].swizzle[j]];
> + if (load_const->def.bit_size == 64)
> + src[i].ul[j] =
> load_const->value.ul[instr->src[i].swizzle[j]];
> + else
> + src[i].u[j] =
> load_const->value.u[instr->src[i].swizzle[j]];
> }
>
> /* We shouldn't have any source modifiers in the
> optimization loop. */
> assert(!instr->src[i].abs && !instr->src[i].negate);
> }
>
> + if (bit_size == 0)
> + bit_size = 32;
... this default to 32 here would not be correct any more. If at this
point the bit-size is 0 (meaning that all inputs and output are sized)
then we should take the bit-size from the opcode's output type, which is
known to be sized, right?
>
> Then it'll get set here.
>
>
> +
> /* We shouldn't have any saturate modifiers in the
> optimization loop. */
> assert(!instr->dest.saturate);
>
> nir_const_value dest =
> nir_eval_const_opcode(instr->op,
> instr->dest.dest.ssa.num_components,
> - src);
> + bit_size, src);
>
> nir_load_const_instr *new_instr =
> nir_load_const_instr_create(mem_ctx,
>
> instr->dest.dest.ssa.num_components);
>
> + new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
> new_instr->value = dest;
>
> nir_instr_insert_before(&instr->instr, &new_instr->instr);
> --
> 2.7.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list