[Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants

Tue Dec 18 10:33:59 UTC 2018

v2:
- Refactor conditions and shared function (Connor)
- Move code to nir_eval_const_opcode() (Connor)
- Don't flush to zero on fquantize2f16
  From Vulkan spec, VK_KHR_shader_float_controls section:

  "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?

  RESOLVED: Yes, except when the opcode is OpQuantizeToF16."

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/compiler/nir/nir_constant_expressions.h  |  3 +-
 src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++--
 src/compiler/nir/nir_loop_analyze.c          |  7 ++-
 src/compiler/nir/nir_opt_constant_folding.c  | 15 ++---
 src/compiler/spirv/spirv_to_nir.c            |  3 +-
 5 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
index 1d6bbbc25d3..a2d416abc45 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -31,6 +31,7 @@
 #include "nir.h"
 
 nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
-                                      unsigned bit_size, nir_const_value *src);
+                                      unsigned bit_size, nir_const_value *src,
+                                      unsigned float_controls_execution_mode);
 
 #endif /* NIR_CONSTANT_EXPRESSIONS_H */
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index 505cdd8baae..dc2132df0d0 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -66,6 +66,37 @@ template = """\
 #include "util/bigmath.h"
 #include "nir_constant_expressions.h"
 
+/**
+ * Checks if the provided value is a denorm and flushes it to zero.
+*/
+static nir_const_value
+constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned bit_size)
+{
+   switch(bit_size) {
+   case 64:
+      if (value.u64[index] < 0x0010000000000000)
+         value.u64[index] = 0;
+      if (value.u64[index] & 0x8000000000000000 &&
+          !(value.u64[index] & 0x7ff0000000000000))
+         value.u64[index] = 0x8000000000000000;
+      break;
+   case 32:
+      if (value.u32[index] < 0x00800000)
+         value.u32[index] = 0;
+      if (value.u32[index] & 0x80000000 &&
+          !(value.u32[index] & 0x7f800000))
+         value.u32[index] = 0x80000000;
+      break;
+   case 16:
+      if (value.u16[index] < 0x0400)
+         value.u16[index] = 0;
+      if (value.u16[index] & 0x8000 &&
+          !(value.u16[index] & 0x7c00))
+         value.u16[index] = 0x8000;
+   }
+   return value;
+}
+
 /**
  * Evaluate one component of packSnorm4x8.
  */
@@ -260,7 +291,7 @@ struct ${type}${width}_vec {
 % endfor
 % endfor
 
-<%def name="evaluate_op(op, bit_size)">
+<%def name="evaluate_op(op, bit_size, execution_mode)">
    <%
    output_type = type_add_size(op.output_type, bit_size)
    input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
@@ -277,6 +308,14 @@ struct ${type}${width}_vec {
          <% continue %>
       %endif
 
+      % for k in range(op.input_sizes[j]):
+         % if op.name != "fquantize2f16" and bit_size > 8 and op.input_types[j] == "float":
+            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+               _src[${j}] = constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size);
+            }
+         % endif
+      % endfor
+
       const struct ${input_types[j]}_vec src${j} = {
       % for k in range(op.input_sizes[j]):
          % if input_types[j] == "int1":
@@ -343,6 +382,12 @@ struct ${type}${width}_vec {
          % else:
             _dst_val.${get_const_field(output_type)}[_i] = dst;
          % endif
+
+         % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type == "float":
+            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+               _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, bit_size);
+            }
+         % endif
       }
    % else:
       ## In the non-per-component case, create a struct dst with
@@ -375,6 +420,12 @@ struct ${type}${width}_vec {
          % else:
             _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
          % endif
+
+         % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type == "float":
+            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+               _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, bit_size);
+            }
+         % endif
       % endfor
    % endif
 </%def>
@@ -383,7 +434,8 @@ struct ${type}${width}_vec {
 static nir_const_value
 evaluate_${name}(MAYBE_UNUSED unsigned num_components,
                  ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
-                 MAYBE_UNUSED nir_const_value *_src)
+                 MAYBE_UNUSED nir_const_value *_src,
+                 MAYBE_UNUSED unsigned execution_mode)
 {
    nir_const_value _dst_val = { {0, } };
 
@@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
       switch (bit_size) {
       % for bit_size in op_bit_sizes(op):
       case ${bit_size}: {
-         ${evaluate_op(op, bit_size)}
+         ${evaluate_op(op, bit_size, execution_mode)}
          break;
       }
       % endfor
@@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
          unreachable("unknown bit width");
       }
    % else:
-      ${evaluate_op(op, 0)}
+      ${evaluate_op(op, 0, execution_mode)}
    % endif
 
    return _dst_val;
@@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
 
 nir_const_value
 nir_eval_const_opcode(nir_op op, unsigned num_components,
-                      unsigned bit_width, nir_const_value *src)
+                      unsigned bit_width, nir_const_value *src,
+                      unsigned float_controls_execution_mode)
 {
    switch (op) {
 % for name in sorted(opcodes.keys()):
    case nir_op_${name}:
-      return evaluate_${name}(num_components, bit_width, src);
+      return evaluate_${name}(num_components, bit_width, src, float_controls_execution_mode);
 % endfor
    default:
       unreachable("shouldn't get here");
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 259f02a854e..c9fba8649db 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -497,19 +497,20 @@ test_iterations(int32_t iter_int, nir_const_value *step,
     */
    nir_const_value mul_src[2] = { iter_src, *step };
    nir_const_value mul_result =
-      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
+      nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE);
 
    /* Add the initial value to the accumulated induction variable total */
    nir_const_value add_src[2] = { mul_result, *initial };
    nir_const_value add_result =
-      nir_eval_const_opcode(add_op, 1, bit_size, add_src);
+      nir_eval_const_opcode(add_op, 1, bit_size, add_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE);
 
    nir_const_value src[2] = { { {0, } }, { {0, } } };
    src[limit_rhs ? 0 : 1] = add_result;
    src[limit_rhs ? 1 : 0] = *limit;
 
    /* Evaluate the loop exit condition */
-   nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src);
+   nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src,
+                                                  SHADER_DEFAULT_FLOAT_CONTROL_MODE);
 
    return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
 }
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 5097a3bcc36..bd6130d5b33 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -39,7 +39,7 @@ struct constant_fold_state {
 };
 
 static bool
-constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)
 {
    nir_const_value src[NIR_MAX_VEC_COMPONENTS];
 
@@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
 
    nir_const_value dest =
       nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
-                            bit_size, src);
+                            bit_size, src, execution_mode);
 
    nir_load_const_instr *new_instr =
       nir_load_const_instr_create(mem_ctx,
@@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
 }
 
 static bool
-constant_fold_block(nir_block *block, void *mem_ctx)
+constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)
 {
    bool progress = false;
 
    nir_foreach_instr_safe(instr, block) {
       switch (instr->type) {
       case nir_instr_type_alu:
-         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
+         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);
          break;
       case nir_instr_type_intrinsic:
          progress |=
@@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)
 }
 
 static bool
-nir_opt_constant_folding_impl(nir_function_impl *impl)
+nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)
 {
    void *mem_ctx = ralloc_parent(impl);
    bool progress = false;
 
    nir_foreach_block(block, impl) {
-      progress |= constant_fold_block(block, mem_ctx);
+      progress |= constant_fold_block(block, mem_ctx, execution_mode);
    }
 
    if (progress)
@@ -204,10 +204,11 @@ bool
 nir_opt_constant_folding(nir_shader *shader)
 {
    bool progress = false;
+   unsigned execution_mode = shader->info.shader_float_controls_execution_mode;
 
    nir_foreach_function(function, shader) {
       if (function->impl)
-         progress |= nir_opt_constant_folding_impl(function->impl);
+         progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);
    }
 
    return progress;
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 96d4d80970f..7578a83e2bb 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
          }
 
          val->constant->values[0] =
-            nir_eval_const_opcode(op, num_components, bit_size, src);
+            nir_eval_const_opcode(op, num_components, bit_size, src,
+                                  b->shader->info.shader_float_controls_execution_mode);
          break;
       } /* default */
       }
-- 
2.19.1