[Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

Thu Jan 31 13:07:31 UTC 2019

On 30/01/2019 16:18, Connor Abbott wrote:
> 
> 
> On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez
> <siglesias at igalia.com <mailto:siglesias at igalia.com>> wrote:
> 
>     v2:
>     - Refactor conditions and shared function (Connor)
>     - Move code to nir_eval_const_opcode() (Connor)
>     - Don't flush to zero on fquantize2f16
>       From Vulkan spec, VK_KHR_shader_float_controls section:
> 
>       "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
> 
>       RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
> 
>     Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com
>     <mailto:siglesias at igalia.com>>
>     ---
>      src/compiler/nir/nir_constant_expressions.h  |  3 +-
>      src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++--
>      src/compiler/nir/nir_loop_analyze.c          |  7 ++-
>      src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
>      src/compiler/spirv/spirv_to_nir.c            |  3 +-
>      5 files changed, 75 insertions(+), 18 deletions(-)
> 
>     diff --git a/src/compiler/nir/nir_constant_expressions.h
>     b/src/compiler/nir/nir_constant_expressions.h
>     index 1d6bbbc25d3..a2d416abc45 100644
>     --- a/src/compiler/nir/nir_constant_expressions.h
>     +++ b/src/compiler/nir/nir_constant_expressions.h
>     @@ -31,6 +31,7 @@
>      #include "nir.h"
> 
>      nir_const_value nir_eval_const_opcode(nir_op op, unsigned
>     num_components,
>     -                                      unsigned bit_size,
>     nir_const_value *src);
>     +                                      unsigned bit_size,
>     nir_const_value *src,
>     +                                      unsigned
>     float_controls_execution_mode);
> 
>      #endif /* NIR_CONSTANT_EXPRESSIONS_H */
>     diff --git a/src/compiler/nir/nir_constant_expressions.py
>     b/src/compiler/nir/nir_constant_expressions.py
>     index 505cdd8baae..dc2132df0d0 100644
>     --- a/src/compiler/nir/nir_constant_expressions.py
>     +++ b/src/compiler/nir/nir_constant_expressions.py
>     @@ -66,6 +66,37 @@ template = """\
>      #include "util/bigmath.h"
>      #include "nir_constant_expressions.h"
> 
>     +/**
>     + * Checks if the provided value is a denorm and flushes it to zero.
>     +*/
>     +static nir_const_value
>     +constant_denorm_flush_to_zero(nir_const_value value, unsigned
>     index, unsigned bit_size)
>     +{
>     +   switch(bit_size) {
>     +   case 64:
>     +      if (value.u64[index] < 0x0010000000000000)
>     +         value.u64[index] = 0;
>     +      if (value.u64[index] & 0x8000000000000000 &&
>     +          !(value.u64[index] & 0x7ff0000000000000))
>     +         value.u64[index] = 0x8000000000000000;
>     +      break;
>     +   case 32:
>     +      if (value.u32[index] < 0x00800000)
>     +         value.u32[index] = 0;
>     +      if (value.u32[index] & 0x80000000 &&
>     +          !(value.u32[index] & 0x7f800000))
>     +         value.u32[index] = 0x80000000;
>     +      break;
>     +   case 16:
>     +      if (value.u16[index] < 0x0400)
>     +         value.u16[index] = 0;
>     +      if (value.u16[index] & 0x8000 &&
>     +          !(value.u16[index] & 0x7c00))
>     +         value.u16[index] = 0x8000;
>     +   }
>     +   return value;
>     +}
>     +
>      /**
>       * Evaluate one component of packSnorm4x8.
>       */
>     @@ -260,7 +291,7 @@ struct ${type}${width}_vec {
>      % endfor
>      % endfor
> 
>     -<%def name="evaluate_op(op, bit_size)">
>     +<%def name="evaluate_op(op, bit_size, execution_mode)">
>         <%
>         output_type = type_add_size(op.output_type, bit_size)
>         input_types = [type_add_size(type_, bit_size) for type_ in
>     op.input_types]
>     @@ -277,6 +308,14 @@ struct ${type}${width}_vec {
>               <% continue %>
>            %endif
> 
>     +      % for k in range(op.input_sizes[j]):
>     +         % if op.name <http://op.name> != "fquantize2f16" and
>     bit_size > 8 and op.input_types[j] == "float":
> 
> 
> This condition doesn't seem right. It may happen to work, but it isn't
> following NIR principles on ALU instructions. Each NIR opcode has an
> output type (given by op.output_type) and input types for each source
> (given by op.input_types). Each type may be sized, like "float32", or
> unsized like "float". All unsized inputs/outputs must have the same bit
> size at runtime. The bit_size here is the common bitsize for
> inputs/outputs with unsized types like "float" in the opcode definition.
> Even though in general it's only known at runtime, we're switching over
> all bitsizes in the generated code, so here we do know what it is at
> compile time. In order to handle sized types, we have to drop all
> references to bit_size and stop comparing op.input_types[j] directly
> with "float" since it may be a sized type. Also, if this source has a
> float type, we already know that its bit size is at least 16, so the
> second check should be useless. I think what you actually want to do is
> extract the base type, i.e. something like: op.name <http://op.name> !=
> "fquantize16" and type_base_name(input_types[j]) == "float"
>  

Right. Thanks for the explanation.

> 
>     +            if (execution_mode &
>     SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> 
>  
> bit_size here isn't the right bit size for sized sources. You want
> ${type_size(input_types[i])} which is the actual size of the source at
> runtime. This will be bit_size if the op.input_types[j] is unsized or
> the input size otherwise.
> 
>     +               _src[${j}] =
>     constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size);
> 
> 
> same as above with bit_size.
> 

OK, I will do them for the dst ones.

> Finally, I think you missed the equivalent hunk for non-per-component
> sources.
>  

Actually, I realized that this is not mandatory for the sources, just
the destination, so I will remove the source part all together and do
the suggested fixes into the destination part.

> 
>     +            }
>     +         % endif
>     +      % endfor
>     +
>            const struct ${input_types[j]}_vec src${j} = {
>            % for k in range(op.input_sizes[j]):
>               % if input_types[j] == "int1":
>     @@ -343,6 +382,12 @@ struct ${type}${width}_vec {
>               % else:
>                  _dst_val.${get_const_field(output_type)}[_i] = dst;
>               % endif
>     +
>     +         % if op.name <http://op.name> != "fquantize2f16" and
>     bit_size > 8 and op.output_type == "float":
>     +            if (execution_mode &
>     SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>     +               _dst_val = constant_denorm_flush_to_zero(_dst_val,
>     _i, bit_size);
>     +            }
>     +         % endif
> 
>            }
>         % else:
>            ## In the non-per-component case, create a struct dst with
>     @@ -375,6 +420,12 @@ struct ${type}${width}_vec {
>               % else:
>                  _dst_val.${get_const_field(output_type)}[${k}] =
>     dst.${"xyzw"[k]};
>               % endif
>     +
>     +         % if op.name <http://op.name> != "fquantize2f16" and
>     bit_size > 8 and op.output_type == "float":
>     +            if (execution_mode &
>     SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>     +               _dst_val = constant_denorm_flush_to_zero(_dst_val,
>     ${k}, bit_size);
>     +            }
>     +         % endif
>            % endfor
>         % endif
>      </%def>
>     @@ -383,7 +434,8 @@ struct ${type}${width}_vec {
>      static nir_const_value
>      evaluate_${name}(MAYBE_UNUSED unsigned num_components,
>                       ${"UNUSED" if op_bit_sizes(op) is None else ""}
>     unsigned bit_size,
>     -                 MAYBE_UNUSED nir_const_value *_src)
>     +                 MAYBE_UNUSED nir_const_value *_src,
>     +                 MAYBE_UNUSED unsigned execution_mode)
>      {
>         nir_const_value _dst_val = { {0, } };
> 
>     @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
>     num_components,
>            switch (bit_size) {
>            % for bit_size in op_bit_sizes(op):
>            case ${bit_size}: {
>     -         ${evaluate_op(op, bit_size)}
>     +         ${evaluate_op(op, bit_size, execution_mode)}
>               break;
>            }
>            % endfor
>     @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
>     num_components,
>               unreachable("unknown bit width");
>            }
>         % else:
>     -      ${evaluate_op(op, 0)}
>     +      ${evaluate_op(op, 0, execution_mode)}
>         % endif
> 
>         return _dst_val;
>     @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned
>     num_components,
> 
>      nir_const_value
>      nir_eval_const_opcode(nir_op op, unsigned num_components,
>     -                      unsigned bit_width, nir_const_value *src)
>     +                      unsigned bit_width, nir_const_value *src,
>     +                      unsigned float_controls_execution_mode)
>      {
>         switch (op) {
>      % for name in sorted(opcodes.keys()):
>         case nir_op_${name}:
>     -      return evaluate_${name}(num_components, bit_width, src);
>     +      return evaluate_${name}(num_components, bit_width, src,
>     float_controls_execution_mode);
>      % endfor
>         default:
>            unreachable("shouldn't get here");
>     diff --git a/src/compiler/nir/nir_loop_analyze.c
>     b/src/compiler/nir/nir_loop_analyze.c
>     index 259f02a854e..c9fba8649db 100644
>     --- a/src/compiler/nir/nir_loop_analyze.c
>     +++ b/src/compiler/nir/nir_loop_analyze.c
>     @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int,
>     nir_const_value *step,
>          */
>         nir_const_value mul_src[2] = { iter_src, *step };
>         nir_const_value mul_result =
>     -      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
>     +      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src,
>     SHADER_DEFAULT_FLOAT_CONTROL_MODE);
> 
> 
> Shouldn't we be using the actual float control mode for the shader? I
> don't know much about this code, but presumably it's trying to simulate
> something calculated by the shader.
>  

Good catch! I'll fix it.

Sam

> 
> 
>         /* Add the initial value to the accumulated induction variable
>     total */
>         nir_const_value add_src[2] = { mul_result, *initial };
>         nir_const_value add_result =
>     -      nir_eval_const_opcode(add_op, 1, bit_size, add_src);
>     +      nir_eval_const_opcode(add_op, 1, bit_size, add_src,
>     SHADER_DEFAULT_FLOAT_CONTROL_MODE);
> 
>         nir_const_value src[2] = { { {0, } }, { {0, } } };
>         src[limit_rhs ? 0 : 1] = add_result;
>         src[limit_rhs ? 1 : 0] = *limit;
> 
>         /* Evaluate the loop exit condition */
>     -   nir_const_value result = nir_eval_const_opcode(cond_op, 1,
>     bit_size, src);
>     +   nir_const_value result = nir_eval_const_opcode(cond_op, 1,
>     bit_size, src,
>     +                                                 
>     SHADER_DEFAULT_FLOAT_CONTROL_MODE);
> 
>         return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
>      }
>     diff --git a/src/compiler/nir/nir_opt_constant_folding.c
>     b/src/compiler/nir/nir_opt_constant_folding.c
>     index 5097a3bcc36..bd6130d5b33 100644
>     --- a/src/compiler/nir/nir_opt_constant_folding.c
>     +++ b/src/compiler/nir/nir_opt_constant_folding.c
>     @@ -39,7 +39,7 @@ struct constant_fold_state {
>      };
> 
>      static bool
>     -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>     +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx,
>     unsigned execution_mode)
>      {
>         nir_const_value src[NIR_MAX_VEC_COMPONENTS];
> 
>     @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr,
>     void *mem_ctx)
> 
>         nir_const_value dest =
>            nir_eval_const_opcode(instr->op,
>     instr->dest.dest.ssa.num_components,
>     -                            bit_size, src);
>     +                            bit_size, src, execution_mode);
> 
>         nir_load_const_instr *new_instr =
>            nir_load_const_instr_create(mem_ctx,
>     @@ -161,14 +161,14 @@
>     constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
>      }
> 
>      static bool
>     -constant_fold_block(nir_block *block, void *mem_ctx)
>     +constant_fold_block(nir_block *block, void *mem_ctx, unsigned
>     execution_mode)
>      {
>         bool progress = false;
> 
>         nir_foreach_instr_safe(instr, block) {
>            switch (instr->type) {
>            case nir_instr_type_alu:
>     -         progress |=
>     constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
>     +         progress |=
>     constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx,
>     execution_mode);
>               break;
>            case nir_instr_type_intrinsic:
>               progress |=
>     @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void
>     *mem_ctx)
>      }
> 
>      static bool
>     -nir_opt_constant_folding_impl(nir_function_impl *impl)
>     +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned
>     execution_mode)
>      {
>         void *mem_ctx = ralloc_parent(impl);
>         bool progress = false;
> 
>         nir_foreach_block(block, impl) {
>     -      progress |= constant_fold_block(block, mem_ctx);
>     +      progress |= constant_fold_block(block, mem_ctx, execution_mode);
>         }
> 
>         if (progress)
>     @@ -204,10 +204,11 @@ bool
>      nir_opt_constant_folding(nir_shader *shader)
>      {
>         bool progress = false;
>     +   unsigned execution_mode =
>     shader->info.shader_float_controls_execution_mode;
> 
>         nir_foreach_function(function, shader) {
>            if (function->impl)
>     -         progress |= nir_opt_constant_folding_impl(function->impl);
>     +         progress |= nir_opt_constant_folding_impl(function->impl,
>     execution_mode);
>         }
> 
>         return progress;
>     diff --git a/src/compiler/spirv/spirv_to_nir.c
>     b/src/compiler/spirv/spirv_to_nir.c
>     index 96d4d80970f..7578a83e2bb 100644
>     --- a/src/compiler/spirv/spirv_to_nir.c
>     +++ b/src/compiler/spirv/spirv_to_nir.c
>     @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b,
>     SpvOp opcode,
>               }
> 
>               val->constant->values[0] =
>     -            nir_eval_const_opcode(op, num_components, bit_size, src);
>     +            nir_eval_const_opcode(op, num_components, bit_size, src,
>     +                                 
>     b->shader->info.shader_float_controls_execution_mode);
>               break;
>            } /* default */
>            }
>     -- 
>     2.19.1
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190131/8ae9e193/attachment-0001.sig>