[Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

Wed Jan 30 15:18:51 UTC 2019

On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez <
siglesias at igalia.com> wrote:

> v2:
> - Refactor conditions and shared function (Connor)
> - Move code to nir_eval_const_opcode() (Connor)
> - Don't flush to zero on fquantize2f16
>   From Vulkan spec, VK_KHR_shader_float_controls section:
>
>   "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
>
>   RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
>
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> ---
>  src/compiler/nir/nir_constant_expressions.h  |  3 +-
>  src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++--
>  src/compiler/nir/nir_loop_analyze.c          |  7 ++-
>  src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
>  src/compiler/spirv/spirv_to_nir.c            |  3 +-
>  5 files changed, 75 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 1d6bbbc25d3..a2d416abc45 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -31,6 +31,7 @@
>  #include "nir.h"
>
>  nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
> -                                      unsigned bit_size, nir_const_value
> *src);
> +                                      unsigned bit_size, nir_const_value
> *src,
> +                                      unsigned
> float_controls_execution_mode);
>
>  #endif /* NIR_CONSTANT_EXPRESSIONS_H */
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 505cdd8baae..dc2132df0d0 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -66,6 +66,37 @@ template = """\
>  #include "util/bigmath.h"
>  #include "nir_constant_expressions.h"
>
> +/**
> + * Checks if the provided value is a denorm and flushes it to zero.
> +*/
> +static nir_const_value
> +constant_denorm_flush_to_zero(nir_const_value value, unsigned index,
> unsigned bit_size)
> +{
> +   switch(bit_size) {
> +   case 64:
> +      if (value.u64[index] < 0x0010000000000000)
> +         value.u64[index] = 0;
> +      if (value.u64[index] & 0x8000000000000000 &&
> +          !(value.u64[index] & 0x7ff0000000000000))
> +         value.u64[index] = 0x8000000000000000;
> +      break;
> +   case 32:
> +      if (value.u32[index] < 0x00800000)
> +         value.u32[index] = 0;
> +      if (value.u32[index] & 0x80000000 &&
> +          !(value.u32[index] & 0x7f800000))
> +         value.u32[index] = 0x80000000;
> +      break;
> +   case 16:
> +      if (value.u16[index] < 0x0400)
> +         value.u16[index] = 0;
> +      if (value.u16[index] & 0x8000 &&
> +          !(value.u16[index] & 0x7c00))
> +         value.u16[index] = 0x8000;
> +   }
> +   return value;
> +}
> +
>  /**
>   * Evaluate one component of packSnorm4x8.
>   */
> @@ -260,7 +291,7 @@ struct ${type}${width}_vec {
>  % endfor
>  % endfor
>
> -<%def name="evaluate_op(op, bit_size)">
> +<%def name="evaluate_op(op, bit_size, execution_mode)">
>     <%
>     output_type = type_add_size(op.output_type, bit_size)
>     input_types = [type_add_size(type_, bit_size) for type_ in
> op.input_types]
> @@ -277,6 +308,14 @@ struct ${type}${width}_vec {
>           <% continue %>
>        %endif
>
> +      % for k in range(op.input_sizes[j]):
> +         % if op.name != "fquantize2f16" and bit_size > 8 and
> op.input_types[j] == "float":
>

This condition doesn't seem right. It may happen to work, but it isn't
following NIR principles on ALU instructions. Each NIR opcode has an output
type (given by op.output_type) and input types for each source (given by
op.input_types). Each type may be sized, like "float32", or unsized like
"float". All unsized inputs/outputs must have the same bit size at runtime.
The bit_size here is the common bitsize for inputs/outputs with unsized
types like "float" in the opcode definition. Even though in general it's
only known at runtime, we're switching over all bitsizes in the generated
code, so here we do know what it is at compile time. In order to handle
sized types, we have to drop all references to bit_size and stop comparing
op.input_types[j] directly with "float" since it may be a sized type. Also,
if this source has a float type, we already know that its bit size is at
least 16, so the second check should be useless. I think what you actually
want to do is extract the base type, i.e. something like: op.name !=
"fquantize16" and type_base_name(input_types[j]) == "float"

> +            if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>

bit_size here isn't the right bit size for sized sources. You want
${type_size(input_types[i])} which is the actual size of the source at
runtime. This will be bit_size if the op.input_types[j] is unsized or the
input size otherwise.

+               _src[${j}] = constant_denorm_flush_to_zero(_src[${j}],
> ${k}, bit_size);
>

same as above with bit_size.

Finally, I think you missed the equivalent hunk for non-per-component
sources.

> +            }
> +         % endif
> +      % endfor
> +
>        const struct ${input_types[j]}_vec src${j} = {
>        % for k in range(op.input_sizes[j]):
>           % if input_types[j] == "int1":
> @@ -343,6 +382,12 @@ struct ${type}${width}_vec {
>           % else:
>              _dst_val.${get_const_field(output_type)}[_i] = dst;
>           % endif
> +
> +         % if op.name != "fquantize2f16" and bit_size > 8 and
> op.output_type == "float":
> +            if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> +               _dst_val = constant_denorm_flush_to_zero(_dst_val, _i,
> bit_size);
> +            }
> +         % endif
>
       }
>     % else:
>        ## In the non-per-component case, create a struct dst with
> @@ -375,6 +420,12 @@ struct ${type}${width}_vec {
>           % else:
>              _dst_val.${get_const_field(output_type)}[${k}] =
> dst.${"xyzw"[k]};
>           % endif
> +
> +         % if op.name != "fquantize2f16" and bit_size > 8 and
> op.output_type == "float":
> +            if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> +               _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k},
> bit_size);
> +            }
> +         % endif
>        % endfor
>     % endif
>  </%def>
> @@ -383,7 +434,8 @@ struct ${type}${width}_vec {
>  static nir_const_value
>  evaluate_${name}(MAYBE_UNUSED unsigned num_components,
>                   ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned
> bit_size,
> -                 MAYBE_UNUSED nir_const_value *_src)
> +                 MAYBE_UNUSED nir_const_value *_src,
> +                 MAYBE_UNUSED unsigned execution_mode)
>  {
>     nir_const_value _dst_val = { {0, } };
>
> @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
>        switch (bit_size) {
>        % for bit_size in op_bit_sizes(op):
>        case ${bit_size}: {
> -         ${evaluate_op(op, bit_size)}
> +         ${evaluate_op(op, bit_size, execution_mode)}
>           break;
>        }
>        % endfor
> @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
>           unreachable("unknown bit width");
>        }
>     % else:
> -      ${evaluate_op(op, 0)}
> +      ${evaluate_op(op, 0, execution_mode)}
>     % endif
>
>     return _dst_val;
> @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
>
>  nir_const_value
>  nir_eval_const_opcode(nir_op op, unsigned num_components,
> -                      unsigned bit_width, nir_const_value *src)
> +                      unsigned bit_width, nir_const_value *src,
> +                      unsigned float_controls_execution_mode)
>  {
>     switch (op) {
>  % for name in sorted(opcodes.keys()):
>     case nir_op_${name}:
> -      return evaluate_${name}(num_components, bit_width, src);
> +      return evaluate_${name}(num_components, bit_width, src,
> float_controls_execution_mode);
>  % endfor
>     default:
>        unreachable("shouldn't get here");
> diff --git a/src/compiler/nir/nir_loop_analyze.c
> b/src/compiler/nir/nir_loop_analyze.c
> index 259f02a854e..c9fba8649db 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int, nir_const_value
> *step,
>      */
>     nir_const_value mul_src[2] = { iter_src, *step };
>     nir_const_value mul_result =
> -      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
> +      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>

Shouldn't we be using the actual float control mode for the shader? I don't
know much about this code, but presumably it's trying to simulate something
calculated by the shader.

>
>     /* Add the initial value to the accumulated induction variable total */
>     nir_const_value add_src[2] = { mul_result, *initial };
>     nir_const_value add_result =
> -      nir_eval_const_opcode(add_op, 1, bit_size, add_src);
> +      nir_eval_const_opcode(add_op, 1, bit_size, add_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
>     nir_const_value src[2] = { { {0, } }, { {0, } } };
>     src[limit_rhs ? 0 : 1] = add_result;
>     src[limit_rhs ? 1 : 0] = *limit;
>
>     /* Evaluate the loop exit condition */
> -   nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size,
> src);
> +   nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size,
> src,
> +
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
>     return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
>  }
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
> b/src/compiler/nir/nir_opt_constant_folding.c
> index 5097a3bcc36..bd6130d5b33 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -39,7 +39,7 @@ struct constant_fold_state {
>  };
>
>  static bool
> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned
> execution_mode)
>  {
>     nir_const_value src[NIR_MAX_VEC_COMPONENTS];
>
> @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void
> *mem_ctx)
>
>     nir_const_value dest =
>        nir_eval_const_opcode(instr->op,
> instr->dest.dest.ssa.num_components,
> -                            bit_size, src);
> +                            bit_size, src, execution_mode);
>
>     nir_load_const_instr *new_instr =
>        nir_load_const_instr_create(mem_ctx,
> @@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr
> *instr)
>  }
>
>  static bool
> -constant_fold_block(nir_block *block, void *mem_ctx)
> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned
> execution_mode)
>  {
>     bool progress = false;
>
>     nir_foreach_instr_safe(instr, block) {
>        switch (instr->type) {
>        case nir_instr_type_alu:
> -         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr),
> mem_ctx);
> +         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr),
> mem_ctx, execution_mode);
>           break;
>        case nir_instr_type_intrinsic:
>           progress |=
> @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)
>  }
>
>  static bool
> -nir_opt_constant_folding_impl(nir_function_impl *impl)
> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned
> execution_mode)
>  {
>     void *mem_ctx = ralloc_parent(impl);
>     bool progress = false;
>
>     nir_foreach_block(block, impl) {
> -      progress |= constant_fold_block(block, mem_ctx);
> +      progress |= constant_fold_block(block, mem_ctx, execution_mode);
>     }
>
>     if (progress)
> @@ -204,10 +204,11 @@ bool
>  nir_opt_constant_folding(nir_shader *shader)
>  {
>     bool progress = false;
> +   unsigned execution_mode =
> shader->info.shader_float_controls_execution_mode;
>
>     nir_foreach_function(function, shader) {
>        if (function->impl)
> -         progress |= nir_opt_constant_folding_impl(function->impl);
> +         progress |= nir_opt_constant_folding_impl(function->impl,
> execution_mode);
>     }
>
>     return progress;
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 96d4d80970f..7578a83e2bb 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp
> opcode,
>           }
>
>           val->constant->values[0] =
> -            nir_eval_const_opcode(op, num_components, bit_size, src);
> +            nir_eval_const_opcode(op, num_components, bit_size, src,
> +
> b->shader->info.shader_float_controls_execution_mode);
>           break;
>        } /* default */
>        }
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190130/fb03044f/attachment-0001.html>