[Mesa-dev] [PATCH 3/6] glsl: move half<->float convertion to util

Sat Oct 10 12:09:11 PDT 2015

On Sat, Oct 10, 2015 at 11:47 AM, Rob Clark <robdclark at gmail.com> wrote:
> From: Rob Clark <robclark at freedesktop.org>
>
> Needed in NIR too, so move out of mesa/main/imports.c
>
> Signed-off-by: Rob Clark <robclark at freedesktop.org>
> ---
>  src/glsl/Makefile.am      |   1 +
>  src/mesa/main/imports.c   | 148 --------------------------------------
>  src/mesa/main/imports.h   |  38 ++++++++--
>  src/util/Makefile.sources |   2 +
>  src/util/convert.c        | 179 ++++++++++++++++++++++++++++++++++++++++++++++
>  src/util/convert.h        |  43 +++++++++++
>  6 files changed, 259 insertions(+), 152 deletions(-)
>  create mode 100644 src/util/convert.c
>  create mode 100644 src/util/convert.h
>
> diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
> index 3265391..347919b 100644
> --- a/src/glsl/Makefile.am
> +++ b/src/glsl/Makefile.am
> @@ -160,6 +160,7 @@ glsl_compiler_SOURCES = \
>  glsl_compiler_LDADD =                                  \
>         libglsl.la                                      \
>         $(top_builddir)/src/libglsl_util.la             \
> +       $(top_builddir)/src/util/libmesautil.la         \
>         $(PTHREAD_LIBS)
>
>  glsl_test_SOURCES = \
> diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
> index 350e675..230ebbc 100644
> --- a/src/mesa/main/imports.c
> +++ b/src/mesa/main/imports.c
> @@ -307,154 +307,6 @@ _mesa_bitcount_64(uint64_t n)
>  }
>  #endif
>
> -
> -/**
> - * Convert a 4-byte float to a 2-byte half float.
> - *
> - * Not all float32 values can be represented exactly as a float16 value. We
> - * round such intermediate float32 values to the nearest float16. When the
> - * float32 lies exactly between to float16 values, we round to the one with
> - * an even mantissa.
> - *
> - * This rounding behavior has several benefits:
> - *   - It has no sign bias.
> - *
> - *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
> - *     GPU ISA.
> - *
> - *   - By reproducing the behavior of the GPU (at least on Intel hardware),
> - *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
> - *     result in the same value as if the expression were executed on the GPU.
> - */
> -GLhalfARB
> -_mesa_float_to_half(float val)
> -{
> -   const fi_type fi = {val};
> -   const int flt_m = fi.i & 0x7fffff;
> -   const int flt_e = (fi.i >> 23) & 0xff;
> -   const int flt_s = (fi.i >> 31) & 0x1;
> -   int s, e, m = 0;
> -   GLhalfARB result;
> -
> -   /* sign bit */
> -   s = flt_s;
> -
> -   /* handle special cases */
> -   if ((flt_e == 0) && (flt_m == 0)) {
> -      /* zero */
> -      /* m = 0; - already set */
> -      e = 0;
> -   }
> -   else if ((flt_e == 0) && (flt_m != 0)) {
> -      /* denorm -- denorm float maps to 0 half */
> -      /* m = 0; - already set */
> -      e = 0;
> -   }
> -   else if ((flt_e == 0xff) && (flt_m == 0)) {
> -      /* infinity */
> -      /* m = 0; - already set */
> -      e = 31;
> -   }
> -   else if ((flt_e == 0xff) && (flt_m != 0)) {
> -      /* NaN */
> -      m = 1;
> -      e = 31;
> -   }
> -   else {
> -      /* regular number */
> -      const int new_exp = flt_e - 127;
> -      if (new_exp < -14) {
> -         /* The float32 lies in the range (0.0, min_normal16) and is rounded
> -          * to a nearby float16 value. The result will be either zero, subnormal,
> -          * or normal.
> -          */
> -         e = 0;
> -         m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
> -      }
> -      else if (new_exp > 15) {
> -         /* map this value to infinity */
> -         /* m = 0; - already set */
> -         e = 31;
> -      }
> -      else {
> -         /* The float32 lies in the range
> -          *   [min_normal16, max_normal16 + max_step16)
> -          * and is rounded to a nearby float16 value. The result will be
> -          * either normal or infinite.
> -          */
> -         e = new_exp + 15;
> -         m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
> -      }
> -   }
> -
> -   assert(0 <= m && m <= 1024);
> -   if (m == 1024) {
> -      /* The float32 was rounded upwards into the range of the next exponent,
> -       * so bump the exponent. This correctly handles the case where f32
> -       * should be rounded up to float16 infinity.
> -       */
> -      ++e;
> -      m = 0;
> -   }
> -
> -   result = (s << 15) | (e << 10) | m;
> -   return result;
> -}
> -
> -
> -/**
> - * Convert a 2-byte half float to a 4-byte float.
> - * Based on code from:
> - * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
> - */
> -float
> -_mesa_half_to_float(GLhalfARB val)
> -{
> -   /* XXX could also use a 64K-entry lookup table */
> -   const int m = val & 0x3ff;
> -   const int e = (val >> 10) & 0x1f;
> -   const int s = (val >> 15) & 0x1;
> -   int flt_m, flt_e, flt_s;
> -   fi_type fi;
> -   float result;
> -
> -   /* sign bit */
> -   flt_s = s;
> -
> -   /* handle special cases */
> -   if ((e == 0) && (m == 0)) {
> -      /* zero */
> -      flt_m = 0;
> -      flt_e = 0;
> -   }
> -   else if ((e == 0) && (m != 0)) {
> -      /* denorm -- denorm half will fit in non-denorm single */
> -      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
> -      float mantissa = ((float) (m)) / 1024.0f;
> -      float sign = s ? -1.0f : 1.0f;
> -      return sign * mantissa * half_denorm;
> -   }
> -   else if ((e == 31) && (m == 0)) {
> -      /* infinity */
> -      flt_e = 0xff;
> -      flt_m = 0;
> -   }
> -   else if ((e == 31) && (m != 0)) {
> -      /* NaN */
> -      flt_e = 0xff;
> -      flt_m = 1;
> -   }
> -   else {
> -      /* regular */
> -      flt_e = e + 112;
> -      flt_m = m << 13;
> -   }
> -
> -   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
> -   result = fi.f;
> -   return result;
> -}
> -
>  /*@}*/
>
>
> diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
> index 9024758..3a09304 100644
> --- a/src/mesa/main/imports.h
> +++ b/src/mesa/main/imports.h
> @@ -42,6 +42,7 @@
>  #include "compiler.h"
>  #include "glheader.h"
>  #include "errors.h"
> +#include "util/convert.h"
>
>  #ifdef __cplusplus
>  extern "C" {
> @@ -396,12 +397,41 @@ _mesa_flsll(uint64_t n)
>  #endif
>  }
>
> +/**
> + * Convert a 4-byte float to a 2-byte half float.
> + *
> + * Not all float32 values can be represented exactly as a float16 value. We
> + * round such intermediate float32 values to the nearest float16. When the
> + * float32 lies exactly between to float16 values, we round to the one with
> + * an even mantissa.
> + *
> + * This rounding behavior has several benefits:
> + *   - It has no sign bias.
> + *
> + *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
> + *     GPU ISA.
> + *
> + *   - By reproducing the behavior of the GPU (at least on Intel hardware),
> + *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
> + *     result in the same value as if the expression were executed on the GPU.
> + */
> +static inline GLhalfARB
> +_mesa_float_to_half(float val)
> +{
> +   return float_to_half(val);
> +}
>
> -extern GLhalfARB
> -_mesa_float_to_half(float f);
>
> -extern float
> -_mesa_half_to_float(GLhalfARB h);
> +/**
> + * Convert a 2-byte half float to a 4-byte float.
> + * Based on code from:
> + * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
> + */
> +static inline float
> +_mesa_half_to_float(GLhalfARB val)
> +{
> +   return half_to_float(val);
> +}
>
>  static inline bool
>  _mesa_half_is_negative(GLhalfARB h)
> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
> index e45431d..71b2a25 100644
> --- a/src/util/Makefile.sources
> +++ b/src/util/Makefile.sources
> @@ -1,5 +1,7 @@
>  MESA_UTIL_FILES :=     \
>         bitset.h \
> +       convert.c \
> +       convert.h \
>         debug.c \
>         debug.h \
>         format_srgb.h \
> diff --git a/src/util/convert.c b/src/util/convert.c
> new file mode 100644
> index 0000000..e86e322
> --- /dev/null
> +++ b/src/util/convert.c

half-float.c seems like a better name.

> @@ -0,0 +1,179 @@
> +/*
> + * Copyright © 2015 Red Hat

What the?

No. This code was written by Brian (commit 7eb3e9b) and Chad (commit
529b6d1). Its copyright belongs to VMware and Intel (and probably
"gking" from [1])

[1] https://web.archive.org/web/20030303115125/http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Rob Clark <robclark at freedesktop.org>

We're trying to stop with author lists generally, but listing yourself
as an author on some code you copied from somewhere else is not okay.

> + */
> +
> +#include <math.h>
> +#include <assert.h>
> +#include "convert.h"
> +#include "rounding.h"
> +
> +typedef union { float f; int32_t i; uint32_t u; } fi_type;
> +
> +/**
> + * Convert a 4-byte float to a 2-byte half float.
> + *
> + * Not all float32 values can be represented exactly as a float16 value. We
> + * round such intermediate float32 values to the nearest float16. When the
> + * float32 lies exactly between to float16 values, we round to the one with
> + * an even mantissa.
> + *
> + * This rounding behavior has several benefits:
> + *   - It has no sign bias.
> + *
> + *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
> + *     GPU ISA.
> + *
> + *   - By reproducing the behavior of the GPU (at least on Intel hardware),
> + *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
> + *     result in the same value as if the expression were executed on the GPU.
> + */
> +uint16_t
> +float_to_half(float val)
> +{
> +   const fi_type fi = {val};
> +   const int flt_m = fi.i & 0x7fffff;
> +   const int flt_e = (fi.i >> 23) & 0xff;
> +   const int flt_s = (fi.i >> 31) & 0x1;
> +   int s, e, m = 0;
> +   uint16_t result;
> +
> +   /* sign bit */
> +   s = flt_s;
> +
> +   /* handle special cases */
> +   if ((flt_e == 0) && (flt_m == 0)) {
> +      /* zero */
> +      /* m = 0; - already set */
> +      e = 0;
> +   }
> +   else if ((flt_e == 0) && (flt_m != 0)) {
> +      /* denorm -- denorm float maps to 0 half */
> +      /* m = 0; - already set */
> +      e = 0;
> +   }
> +   else if ((flt_e == 0xff) && (flt_m == 0)) {
> +      /* infinity */
> +      /* m = 0; - already set */
> +      e = 31;
> +   }
> +   else if ((flt_e == 0xff) && (flt_m != 0)) {
> +      /* NaN */
> +      m = 1;
> +      e = 31;
> +   }
> +   else {
> +      /* regular number */
> +      const int new_exp = flt_e - 127;
> +      if (new_exp < -14) {
> +         /* The float32 lies in the range (0.0, min_normal16) and is rounded
> +          * to a nearby float16 value. The result will be either zero, subnormal,
> +          * or normal.
> +          */
> +         e = 0;
> +         m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
> +      }
> +      else if (new_exp > 15) {
> +         /* map this value to infinity */
> +         /* m = 0; - already set */
> +         e = 31;
> +      }
> +      else {
> +         /* The float32 lies in the range
> +          *   [min_normal16, max_normal16 + max_step16)
> +          * and is rounded to a nearby float16 value. The result will be
> +          * either normal or infinite.
> +          */
> +         e = new_exp + 15;
> +         m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
> +      }
> +   }
> +
> +   assert(0 <= m && m <= 1024);
> +   if (m == 1024) {
> +      /* The float32 was rounded upwards into the range of the next exponent,
> +       * so bump the exponent. This correctly handles the case where f32
> +       * should be rounded up to float16 infinity.
> +       */
> +      ++e;
> +      m = 0;
> +   }
> +
> +   result = (s << 15) | (e << 10) | m;
> +   return result;
> +}
> +
> +
> +/**
> + * Convert a 2-byte half float to a 4-byte float.
> + * Based on code from:
> + * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
> + */
> +float
> +half_to_float(uint16_t val)
> +{
> +   /* XXX could also use a 64K-entry lookup table */
> +   const int m = val & 0x3ff;
> +   const int e = (val >> 10) & 0x1f;
> +   const int s = (val >> 15) & 0x1;
> +   int flt_m, flt_e, flt_s;
> +   fi_type fi;
> +   float result;
> +
> +   /* sign bit */
> +   flt_s = s;
> +
> +   /* handle special cases */
> +   if ((e == 0) && (m == 0)) {
> +      /* zero */
> +      flt_m = 0;
> +      flt_e = 0;
> +   }
> +   else if ((e == 0) && (m != 0)) {
> +      /* denorm -- denorm half will fit in non-denorm single */
> +      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
> +      float mantissa = ((float) (m)) / 1024.0f;
> +      float sign = s ? -1.0f : 1.0f;
> +      return sign * mantissa * half_denorm;
> +   }
> +   else if ((e == 31) && (m == 0)) {
> +      /* infinity */
> +      flt_e = 0xff;
> +      flt_m = 0;
> +   }
> +   else if ((e == 31) && (m != 0)) {
> +      /* NaN */
> +      flt_e = 0xff;
> +      flt_m = 1;
> +   }
> +   else {
> +      /* regular */
> +      flt_e = e + 112;
> +      flt_m = m << 13;
> +   }
> +
> +   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
> +   result = fi.f;
> +   return result;
> +}
> diff --git a/src/util/convert.h b/src/util/convert.h
> new file mode 100644
> index 0000000..30d36a1
> --- /dev/null
> +++ b/src/util/convert.h
> @@ -0,0 +1,43 @@
> +/*
> + * Copyright © 2015 Red Hat

Seems fishy.

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Rob Clark <robclark at freedesktop.org>

Again, no author list.

> + */
> +
> +#ifndef _MATH_H_
> +#define _MATH_H_

Wrong name.

> +
> +#include <stdint.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +uint16_t float_to_half(float val);
> +float half_to_float(uint16_t val);

I think these functions need to be prefixed with something -- util_*
or something or just leave them as _mesa_*.
> +
> +#ifdef __cplusplus
> +} /* extern C */
> +#endif
> +
> +#endif /* _MATH_H_ */