Mesa (master): nir: port fp16 casting code from dxil

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Mar 22 02:36:45 UTC 2021


Module: Mesa
Branch: master
Commit: 01dfd65a2d407dd95ac472e7e80e736bc7a8fc57
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=01dfd65a2d407dd95ac472e7e80e736bc7a8fc57

Author: Dave Airlie <airlied at redhat.com>
Date:   Tue Dec 29 12:43:11 2020 +1000

nir: port fp16 casting code from dxil

This moves the dxil pass to common code and makes dxil
use the new code.

Acked-by: Adam Jackson <ajax at redhat.com>
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
Reviewed-by: Jesse Natalie <jenatali at microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9643>

---

 src/compiler/nir/meson.build           |   1 +
 src/compiler/nir/nir.h                 |   1 +
 src/compiler/nir/nir_lower_fp16_conv.c | 235 +++++++++++++++++++++++++++++++++
 src/microsoft/clc/clc_compiler.c       |   2 +-
 src/microsoft/compiler/dxil_nir.c      | 217 ------------------------------
 5 files changed, 238 insertions(+), 218 deletions(-)

diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index b82fe641c73..f45a9c17b43 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -147,6 +147,7 @@ files_libnir = files(
   'nir_lower_fb_read.c',
   'nir_lower_flatshade.c',
   'nir_lower_flrp.c',
+  'nir_lower_fp16_conv.c',
   'nir_lower_fragcoord_wtrans.c',
   'nir_lower_fragcolor.c',
   'nir_lower_frexp.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4a4309947cf..3620dde1a30 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5004,6 +5004,7 @@ bool nir_shader_uses_view_index(nir_shader *shader);
 bool nir_can_lower_multiview(nir_shader *shader);
 bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
 
+bool nir_lower_fp16_casts(nir_shader *shader);
 bool nir_normalize_cubemap_coords(nir_shader *shader);
 
 void nir_live_ssa_defs_impl(nir_function_impl *impl);
diff --git a/src/compiler/nir/nir_lower_fp16_conv.c b/src/compiler/nir/nir_lower_fp16_conv.c
new file mode 100644
index 00000000000..4ff3cb74d4b
--- /dev/null
+++ b/src/compiler/nir/nir_lower_fp16_conv.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright © Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir_builder.h"
+
+/* The following float-to-half conversion routines are based on the "half" library:
+ * https://sourceforge.net/projects/half/
+ *
+ * half - IEEE 754-based half-precision floating-point library.
+ *
+ * Copyright (c) 2012-2019 Christian Rau <rauy at users.sourceforge.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Version 2.1.0
+ */
+static bool
+lower_fp16_casts_filter(const nir_instr *instr, const void *data)
+{
+   if (instr->type == nir_instr_type_alu) {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      switch (alu->op) {
+      case nir_op_f2f16:
+      case nir_op_f2f16_rtne:
+      case nir_op_f2f16_rtz:
+         return true;
+      default:
+         return false;
+      }
+   } else if (instr->type == nir_instr_type_intrinsic) {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      return intrin->intrinsic == nir_intrinsic_convert_alu_types &&
+         nir_intrinsic_dest_type(intrin) == nir_type_float16;
+   }
+   return false;
+}
+
+static nir_ssa_def *
+half_rounded(nir_builder *b, nir_ssa_def *value, nir_ssa_def *guard, nir_ssa_def *sticky,
+             nir_ssa_def *sign, nir_rounding_mode mode)
+{
+   switch (mode) {
+   case nir_rounding_mode_rtne:
+      return nir_iadd(b, value, nir_iand(b, guard, nir_ior(b, sticky, value)));
+   case nir_rounding_mode_ru:
+      sign = nir_ushr(b, sign, nir_imm_int(b, 31));
+      return nir_iadd(b, value, nir_iand(b, nir_inot(b, sign),
+                                            nir_ior(b, guard, sticky)));
+   case nir_rounding_mode_rd:
+      sign = nir_ushr(b, sign, nir_imm_int(b, 31));
+      return nir_iadd(b, value, nir_iand(b, sign,
+                                            nir_ior(b, guard, sticky)));
+   default:
+      return value;
+   }
+}
+
+static nir_ssa_def *
+float_to_half_impl(nir_builder *b, nir_ssa_def *src, nir_rounding_mode mode)
+{
+   nir_ssa_def *f32infinity = nir_imm_int(b, 255 << 23);
+   nir_ssa_def *f16max = nir_imm_int(b, (127 + 16) << 23);
+   nir_ssa_def *sign = nir_iand(b, src, nir_imm_int(b, 0x80000000));
+   nir_ssa_def *one = nir_imm_int(b, 1);
+
+   nir_ssa_def *abs = nir_iand(b, src, nir_imm_int(b, 0x7FFFFFFF));
+   /* NaN or INF. For rtne, overflow also becomes INF, so combine the comparisons */
+   nir_push_if(b, nir_ige(b, abs, mode == nir_rounding_mode_rtne ? f16max : f32infinity));
+   nir_ssa_def *inf_nanfp16 = nir_bcsel(b,
+                                    nir_ilt(b, f32infinity, abs),
+                                    nir_imm_int(b, 0x7E00),
+                                    nir_imm_int(b, 0x7C00));
+   nir_push_else(b, NULL);
+
+   nir_ssa_def *overflowed_fp16 = NULL;
+   if (mode != nir_rounding_mode_rtne) {
+      /* Handle overflow */
+      nir_push_if(b, nir_ige(b, abs, f16max));
+      switch (mode) {
+      case nir_rounding_mode_rtz:
+         overflowed_fp16 = nir_imm_int(b, 0x7BFF);
+         break;
+      case nir_rounding_mode_ru:
+         /* Negative becomes max float, positive becomes inf */
+         overflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), nir_imm_int(b, 0x7BFF), nir_imm_int(b, 0x7C00));
+         break;
+      case nir_rounding_mode_rd:
+         /* Negative becomes inf, positive becomes max float */
+         overflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), nir_imm_int(b, 0x7C00), nir_imm_int(b, 0x7BFF));
+         break;
+      default: unreachable("Should've been handled already");
+      }
+      nir_push_else(b, NULL);
+   }
+
+   nir_push_if(b, nir_ige(b, abs, nir_imm_int(b, 113 << 23)));
+
+   /* FP16 will be normal */
+   nir_ssa_def *zero = nir_imm_int(b, 0);
+   nir_ssa_def *value = nir_ior(b,
+                                nir_ishl(b,
+                                         nir_isub(b,
+                                                  nir_ushr(b, abs, nir_imm_int(b, 23)),
+                                                  nir_imm_int(b, 112)),
+                                         nir_imm_int(b, 10)),
+                                nir_iand(b, nir_ushr(b, abs, nir_imm_int(b, 13)), nir_imm_int(b, 0x3FFF)));
+   nir_ssa_def *guard = nir_iand(b, nir_ushr(b, abs, nir_imm_int(b, 12)), one);
+   nir_ssa_def *sticky = nir_bcsel(b, nir_ine(b, nir_iand(b, abs, nir_imm_int(b, 0xFFF)), zero), one, zero);
+   nir_ssa_def *normal_fp16 = half_rounded(b, value, guard, sticky, sign, mode);
+
+   nir_push_else(b, NULL);
+   nir_push_if(b, nir_ige(b, abs, nir_imm_int(b, 102 << 23)));
+
+   /* FP16 will be denormal */
+   nir_ssa_def *i = nir_isub(b, nir_imm_int(b, 125), nir_ushr(b, abs, nir_imm_int(b, 23)));
+   nir_ssa_def *masked = nir_ior(b, nir_iand(b, abs, nir_imm_int(b, 0x7FFFFF)), nir_imm_int(b, 0x800000));
+   value = nir_ushr(b, masked, nir_iadd(b, i, one));
+   guard = nir_iand(b, nir_ushr(b, masked, i), one);
+   sticky = nir_bcsel(b, nir_ine(b, nir_iand(b, masked, nir_isub(b, nir_ishl(b, one, i), one)), zero), one, zero);
+   nir_ssa_def *denormal_fp16 = half_rounded(b, value, guard, sticky, sign, mode);
+
+   nir_push_else(b, NULL);
+
+   /* Handle underflow. Nonzero values need to shift up or down for round-up or round-down */
+   nir_ssa_def *underflowed_fp16 = zero;
+   if (mode == nir_rounding_mode_ru ||
+       mode == nir_rounding_mode_rd) {
+      nir_push_if(b, nir_i2b1(b, abs));
+
+      if (mode == nir_rounding_mode_ru)
+         underflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), zero, one);
+      else
+         underflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), one, zero);
+
+      nir_push_else(b, NULL);
+      nir_pop_if(b, NULL);
+      underflowed_fp16 = nir_if_phi(b, underflowed_fp16, zero);
+   }
+
+   nir_pop_if(b, NULL);
+   nir_ssa_def *underflowed_or_denorm_fp16 = nir_if_phi(b, denormal_fp16, underflowed_fp16);
+
+   nir_pop_if(b, NULL);
+   nir_ssa_def *finite_fp16 = nir_if_phi(b, normal_fp16, underflowed_or_denorm_fp16);
+
+   nir_ssa_def *finite_or_overflowed_fp16 = finite_fp16;
+   if (mode != nir_rounding_mode_rtne) {
+      nir_pop_if(b, NULL);
+      finite_or_overflowed_fp16 = nir_if_phi(b, overflowed_fp16, finite_fp16);
+   }
+
+   nir_pop_if(b, NULL);
+   nir_ssa_def *fp16 = nir_if_phi(b, inf_nanfp16, finite_or_overflowed_fp16);
+
+   return nir_u2u16(b, nir_ior(b, fp16, nir_ushr(b, sign, nir_imm_int(b, 16))));
+}
+
+static nir_ssa_def *
+lower_fp16_cast_impl(nir_builder *b, nir_instr *instr, void *data)
+{
+   nir_ssa_def *src, *dst;
+   uint8_t *swizzle = NULL;
+   nir_rounding_mode mode = nir_rounding_mode_rtne;
+
+   if (instr->type == nir_instr_type_alu) {
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      src = alu->src[0].src.ssa;
+      swizzle = alu->src[0].swizzle;
+      dst = &alu->dest.dest.ssa;
+      assert(src->bit_size == 32);
+      switch (alu->op) {
+      case nir_op_f2f16:
+      case nir_op_f2f16_rtne:
+         break;
+      case nir_op_f2f16_rtz:
+         mode = nir_rounding_mode_rtz;
+         break;
+      default: unreachable("Should've been filtered");
+      }
+   } else {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      assert(nir_intrinsic_src_type(intrin) == nir_type_float32);
+      src = intrin->src[0].ssa;
+      dst = &intrin->dest.ssa;
+      mode = nir_intrinsic_rounding_mode(intrin);
+   }
+
+   nir_ssa_def *rets[NIR_MAX_VEC_COMPONENTS] = { NULL };
+
+   for (unsigned i = 0; i < dst->num_components; i++) {
+      nir_ssa_def *comp = nir_channel(b, src, swizzle ? swizzle[i] : i);
+      rets[i] = float_to_half_impl(b, comp, mode);
+   }
+
+   return nir_vec(b, rets, dst->num_components);
+}
+
+bool
+nir_lower_fp16_casts(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(shader,
+                                        lower_fp16_casts_filter,
+                                        lower_fp16_cast_impl,
+                                        NULL);
+}
diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c
index 632c60e983b..dbf6b037cfa 100644
--- a/src/microsoft/clc/clc_compiler.c
+++ b/src/microsoft/clc/clc_compiler.c
@@ -1361,7 +1361,7 @@ clc_to_dxil(struct clc_context *ctx,
    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
    NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);
    NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
-   NIR_PASS_V(nir, dxil_nir_lower_fp16_casts);
+   NIR_PASS_V(nir, nir_lower_fp16_casts);
    NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
 
    // Convert pack to pack_split
diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c
index 599cb5be659..1fab19a8f67 100644
--- a/src/microsoft/compiler/dxil_nir.c
+++ b/src/microsoft/compiler/dxil_nir.c
@@ -1130,220 +1130,3 @@ dxil_nir_lower_upcast_phis(nir_shader *shader, unsigned min_bit_size)
 
    return progress;
 }
-
-/* The following float-to-half conversion routines are based on the "half" library:
- * https://sourceforge.net/projects/half/
- *
- * half - IEEE 754-based half-precision floating-point library.
- *
- * Copyright (c) 2012-2019 Christian Rau <rauy at users.sourceforge.net>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
- * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
- * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Version 2.1.0
- */
-
-
-static bool
-lower_fp16_casts_filter(const nir_instr *instr, const void *data)
-{
-   if (instr->type == nir_instr_type_alu) {
-      nir_alu_instr *alu = nir_instr_as_alu(instr);
-      /* TODO: DXIL has instructions for f2f16_rtz. For CL, it's not precise enough
-       * due to denorm handling. If the f2f16 instruction has undef rounding mode,
-       * we could map that too, but for CL, f2f16 is implied to mean rtne.
-       */
-      switch (alu->op) {
-      case nir_op_f2f16:
-      case nir_op_f2f16_rtne:
-      case nir_op_f2f16_rtz:
-         return true;
-      default:
-         return false;
-      }
-   } else if (instr->type == nir_instr_type_intrinsic) {
-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-      return intrin->intrinsic == nir_intrinsic_convert_alu_types &&
-         nir_intrinsic_dest_type(intrin) == nir_type_float16;
-   }
-   return false;
-}
-
-static nir_ssa_def *
-half_rounded(nir_builder *b, nir_ssa_def *value, nir_ssa_def *guard, nir_ssa_def *sticky,
-             nir_ssa_def *sign, nir_rounding_mode mode)
-{
-   switch (mode) {
-   case nir_rounding_mode_rtne:
-      return nir_iadd(b, value, nir_iand(b, guard, nir_ior(b, sticky, value)));
-   case nir_rounding_mode_ru:
-      sign = nir_ushr(b, sign, nir_imm_int(b, 31));
-      return nir_iadd(b, value, nir_iand(b, nir_inot(b, sign),
-                                            nir_ior(b, guard, sticky)));
-   case nir_rounding_mode_rd:
-      sign = nir_ushr(b, sign, nir_imm_int(b, 31));
-      return nir_iadd(b, value, nir_iand(b, sign,
-                                            nir_ior(b, guard, sticky)));
-   default:
-      return value;
-   }
-}
-
-static nir_ssa_def *
-float_to_half_impl(nir_builder *b, nir_ssa_def *src, nir_rounding_mode mode)
-{
-   nir_ssa_def *f32infinity = nir_imm_int(b, 255 << 23);
-   nir_ssa_def *f16max = nir_imm_int(b, (127 + 16) << 23);
-   nir_ssa_def *sign = nir_iand(b, src, nir_imm_int(b, 0x80000000));
-   nir_ssa_def *one = nir_imm_int(b, 1);
-
-   nir_ssa_def *abs = nir_iand(b, src, nir_imm_int(b, 0x7FFFFFFF));
-   /* NaN or INF. For rtne, overflow also becomes INF, so combine the comparisons */
-   nir_push_if(b, nir_ige(b, abs, mode == nir_rounding_mode_rtne ? f16max : f32infinity));
-   nir_ssa_def *inf_nanfp16 = nir_bcsel(b,
-                                    nir_ilt(b, f32infinity, abs),
-                                    nir_imm_int(b, 0x7E00),
-                                    nir_imm_int(b, 0x7C00));
-   nir_push_else(b, NULL);
-
-   nir_ssa_def *overflowed_fp16 = NULL;
-   if (mode != nir_rounding_mode_rtne) {
-      /* Handle overflow */
-      nir_push_if(b, nir_ige(b, abs, f16max));
-      switch (mode) {
-      case nir_rounding_mode_rtz:
-         overflowed_fp16 = nir_imm_int(b, 0x7BFF);
-         break;
-      case nir_rounding_mode_ru:
-         /* Negative becomes max float, positive becomes inf */
-         overflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), nir_imm_int(b, 0x7BFF), nir_imm_int(b, 0x7C00));
-         break;
-      case nir_rounding_mode_rd:
-         /* Negative becomes inf, positive becomes max float */
-         overflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), nir_imm_int(b, 0x7C00), nir_imm_int(b, 0x7BFF));
-         break;
-      default: unreachable("Should've been handled already");
-      }
-      nir_push_else(b, NULL);
-   }
-
-   nir_push_if(b, nir_ige(b, abs, nir_imm_int(b, 113 << 23)));
-
-   /* FP16 will be normal */
-   nir_ssa_def *zero = nir_imm_int(b, 0);
-   nir_ssa_def *value = nir_ior(b,
-                                nir_ishl(b,
-                                         nir_isub(b,
-                                                  nir_ushr(b, abs, nir_imm_int(b, 23)),
-                                                  nir_imm_int(b, 112)),
-                                         nir_imm_int(b, 10)),
-                                nir_iand(b, nir_ushr(b, abs, nir_imm_int(b, 13)), nir_imm_int(b, 0x3FFF)));
-   nir_ssa_def *guard = nir_iand(b, nir_ushr(b, abs, nir_imm_int(b, 12)), one);
-   nir_ssa_def *sticky = nir_bcsel(b, nir_ine(b, nir_iand(b, abs, nir_imm_int(b, 0xFFF)), zero), one, zero);
-   nir_ssa_def *normal_fp16 = half_rounded(b, value, guard, sticky, sign, mode);
-
-   nir_push_else(b, NULL);
-   nir_push_if(b, nir_ige(b, abs, nir_imm_int(b, 102 << 23)));
-   
-   /* FP16 will be denormal */
-   nir_ssa_def *i = nir_isub(b, nir_imm_int(b, 125), nir_ushr(b, abs, nir_imm_int(b, 23)));
-   nir_ssa_def *masked = nir_ior(b, nir_iand(b, abs, nir_imm_int(b, 0x7FFFFF)), nir_imm_int(b, 0x800000));
-   value = nir_ushr(b, masked, nir_iadd(b, i, one));
-   guard = nir_iand(b, nir_ushr(b, masked, i), one);
-   sticky = nir_bcsel(b, nir_ine(b, nir_iand(b, masked, nir_isub(b, nir_ishl(b, one, i), one)), zero), one, zero);
-   nir_ssa_def *denormal_fp16 = half_rounded(b, value, guard, sticky, sign, mode);
-
-   nir_push_else(b, NULL);
-
-   /* Handle underflow. Nonzero values need to shift up or down for round-up or round-down */
-   nir_ssa_def *underflowed_fp16 = zero;
-   if (mode == nir_rounding_mode_ru ||
-       mode == nir_rounding_mode_rd) {
-      nir_push_if(b, nir_i2b1(b, abs));
-
-      if (mode == nir_rounding_mode_ru)
-         underflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), zero, one);
-      else
-         underflowed_fp16 = nir_bcsel(b, nir_i2b1(b, sign), one, zero);
-
-      nir_push_else(b, NULL);
-      nir_pop_if(b, NULL);
-      underflowed_fp16 = nir_if_phi(b, underflowed_fp16, zero);
-   }
-
-   nir_pop_if(b, NULL);
-   nir_ssa_def *underflowed_or_denorm_fp16 = nir_if_phi(b, denormal_fp16, underflowed_fp16);
-
-   nir_pop_if(b, NULL);
-   nir_ssa_def *finite_fp16 = nir_if_phi(b, normal_fp16, underflowed_or_denorm_fp16);
-
-   nir_ssa_def *finite_or_overflowed_fp16 = finite_fp16;
-   if (mode != nir_rounding_mode_rtne) {
-      nir_pop_if(b, NULL);
-      finite_or_overflowed_fp16 = nir_if_phi(b, overflowed_fp16, finite_fp16);
-   }
-
-   nir_pop_if(b, NULL);
-   nir_ssa_def *fp16 = nir_if_phi(b, inf_nanfp16, finite_or_overflowed_fp16);
-
-   return nir_u2u16(b, nir_ior(b, fp16, nir_ushr(b, sign, nir_imm_int(b, 16))));
-}
-
-static nir_ssa_def *
-lower_fp16_cast_impl(nir_builder *b, nir_instr *instr, void *data)
-{
-   nir_ssa_def *src, *dst;
-   uint8_t *swizzle = NULL;
-   nir_rounding_mode mode = nir_rounding_mode_rtne;
-
-   if (instr->type == nir_instr_type_alu) {
-      nir_alu_instr *alu = nir_instr_as_alu(instr);
-      src = alu->src[0].src.ssa;
-      swizzle = alu->src[0].swizzle;
-      dst = &alu->dest.dest.ssa;
-      assert(src->bit_size == 32);
-      switch (alu->op) {
-      case nir_op_f2f16:
-      case nir_op_f2f16_rtne:
-         break;
-      case nir_op_f2f16_rtz:
-         mode = nir_rounding_mode_rtz;
-         break;
-      default: unreachable("Should've been filtered");
-      }
-   } else {
-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-      assert(nir_intrinsic_src_type(intrin) == nir_type_float32);
-      src = intrin->src[0].ssa;
-      dst = &intrin->dest.ssa;
-      mode = nir_intrinsic_rounding_mode(intrin);
-   }
-
-   nir_ssa_def *rets[NIR_MAX_VEC_COMPONENTS] = { NULL };
-
-   for (unsigned i = 0; i < dst->num_components; i++) {
-      nir_ssa_def *comp = nir_channel(b, src, swizzle ? swizzle[i] : i);
-      rets[i] = float_to_half_impl(b, comp, mode);
-   }
-
-   return nir_vec(b, rets, dst->num_components);
-}
-
-bool
-dxil_nir_lower_fp16_casts(nir_shader *shader)
-{
-   return nir_shader_lower_instructions(shader,
-                                        lower_fp16_casts_filter,
-                                        lower_fp16_cast_impl,
-                                        NULL);
-}



More information about the mesa-commit mailing list