[Mesa-dev] [PATCH 14/28] nir: fix denorms in unpack_half_1x16()

Wed Dec 5 15:55:29 UTC 2018

According to VK_KHR_shader_float_controls:

"Denormalized values obtained via unpacking an integer into a vector
 of values with smaller bit width and interpreting those values as
 floating-point numbers must: be flushed to zero, unless the entry point
 is declared with the code:DenormPreserve execution mode."

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/compiler/nir/nir_constant_expressions.py | 13 +++++++++++++
 src/compiler/nir/nir_lower_alu_to_scalar.c   | 10 ++++++++--
 src/compiler/nir/nir_opcodes.py              |  5 +++++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index a9af1bd233d..bc60a08da28 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -245,6 +245,19 @@ pack_half_1x16(float x)
    return _mesa_float_to_half(x);
 }
 
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static float
+unpack_half_1x16_flush_to_zero(uint16_t u)
+{
+   if (u < 0x0400)
+      u = 0;
+   if (u & 0x8000 && !(u & 0x7c00))
+      u = 0x8000;
+   return _mesa_half_to_float(u);
+}
+
 /**
  * Evaluate one component of unpackHalf2x16.
  */
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 7ef032cd164..d80cf2504c7 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -133,8 +133,14 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       nir_ssa_def *packed = nir_ssa_for_alu_src(b, instr, 0);
 
       nir_ssa_def *comps[2];
-      comps[0] = nir_unpack_half_2x16_split_x(b, packed);
-      comps[1] = nir_unpack_half_2x16_split_y(b, packed);
+
+      if (b->shader->info.shader_float_controls_execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16) {
+         comps[0] = nir_unpack_half_2x16_split_x_flush_to_zero(b, packed);
+         comps[1] = nir_unpack_half_2x16_split_y_flush_to_zero(b, packed);
+      } else {
+         comps[0] = nir_unpack_half_2x16_split_x(b, packed);
+         comps[1] = nir_unpack_half_2x16_split_y(b, packed);
+      }
       nir_ssa_def *vec = nir_vec(b, comps, 2);
 
       nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index eb554a66b44..191025f6932 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -309,6 +309,11 @@ unop_convert("unpack_half_2x16_split_x", tfloat32, tuint32,
 unop_convert("unpack_half_2x16_split_y", tfloat32, tuint32,
              "unpack_half_1x16((uint16_t)(src0 >> 16))")
 
+unop_convert("unpack_half_2x16_split_x_flush_to_zero", tfloat32, tuint32,
+             "unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff))")
+unop_convert("unpack_half_2x16_split_y_flush_to_zero", tfloat32, tuint32,
+             "unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16))")
+
 unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0")
 unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16")
 
-- 
2.19.1