[Mesa-dev] [PATCH 3/6] glsl: move half<->float convertion to util

Sat Oct 10 12:29:40 PDT 2015

On Sat, Oct 10, 2015 at 3:09 PM, Matt Turner <mattst88 at gmail.com> wrote:
> On Sat, Oct 10, 2015 at 11:47 AM, Rob Clark <robdclark at gmail.com> wrote:
>> From: Rob Clark <robclark at freedesktop.org>
>>
>> Needed in NIR too, so move out of mesa/main/imports.c
>>
>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>> ---
>>  src/glsl/Makefile.am      |   1 +
>>  src/mesa/main/imports.c   | 148 --------------------------------------
>>  src/mesa/main/imports.h   |  38 ++++++++--
>>  src/util/Makefile.sources |   2 +
>>  src/util/convert.c        | 179 ++++++++++++++++++++++++++++++++++++++++++++++
>>  src/util/convert.h        |  43 +++++++++++
>>  6 files changed, 259 insertions(+), 152 deletions(-)
>>  create mode 100644 src/util/convert.c
>>  create mode 100644 src/util/convert.h
>>
>> diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
>> index 3265391..347919b 100644
>> --- a/src/glsl/Makefile.am
>> +++ b/src/glsl/Makefile.am
>> @@ -160,6 +160,7 @@ glsl_compiler_SOURCES = \
>>  glsl_compiler_LDADD =                                  \
>>         libglsl.la                                      \
>>         $(top_builddir)/src/libglsl_util.la             \
>> +       $(top_builddir)/src/util/libmesautil.la         \
>>         $(PTHREAD_LIBS)
>>
>>  glsl_test_SOURCES = \
>> diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
>> index 350e675..230ebbc 100644
>> --- a/src/mesa/main/imports.c
>> +++ b/src/mesa/main/imports.c
>> @@ -307,154 +307,6 @@ _mesa_bitcount_64(uint64_t n)
>>  }
>>  #endif
>>
>> -
>> -/**
>> - * Convert a 4-byte float to a 2-byte half float.
>> - *
>> - * Not all float32 values can be represented exactly as a float16 value. We
>> - * round such intermediate float32 values to the nearest float16. When the
>> - * float32 lies exactly between to float16 values, we round to the one with
>> - * an even mantissa.
>> - *
>> - * This rounding behavior has several benefits:
>> - *   - It has no sign bias.
>> - *
>> - *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
>> - *     GPU ISA.
>> - *
>> - *   - By reproducing the behavior of the GPU (at least on Intel hardware),
>> - *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
>> - *     result in the same value as if the expression were executed on the GPU.
>> - */
>> -GLhalfARB
>> -_mesa_float_to_half(float val)
>> -{
>> -   const fi_type fi = {val};
>> -   const int flt_m = fi.i & 0x7fffff;
>> -   const int flt_e = (fi.i >> 23) & 0xff;
>> -   const int flt_s = (fi.i >> 31) & 0x1;
>> -   int s, e, m = 0;
>> -   GLhalfARB result;
>> -
>> -   /* sign bit */
>> -   s = flt_s;
>> -
>> -   /* handle special cases */
>> -   if ((flt_e == 0) && (flt_m == 0)) {
>> -      /* zero */
>> -      /* m = 0; - already set */
>> -      e = 0;
>> -   }
>> -   else if ((flt_e == 0) && (flt_m != 0)) {
>> -      /* denorm -- denorm float maps to 0 half */
>> -      /* m = 0; - already set */
>> -      e = 0;
>> -   }
>> -   else if ((flt_e == 0xff) && (flt_m == 0)) {
>> -      /* infinity */
>> -      /* m = 0; - already set */
>> -      e = 31;
>> -   }
>> -   else if ((flt_e == 0xff) && (flt_m != 0)) {
>> -      /* NaN */
>> -      m = 1;
>> -      e = 31;
>> -   }
>> -   else {
>> -      /* regular number */
>> -      const int new_exp = flt_e - 127;
>> -      if (new_exp < -14) {
>> -         /* The float32 lies in the range (0.0, min_normal16) and is rounded
>> -          * to a nearby float16 value. The result will be either zero, subnormal,
>> -          * or normal.
>> -          */
>> -         e = 0;
>> -         m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
>> -      }
>> -      else if (new_exp > 15) {
>> -         /* map this value to infinity */
>> -         /* m = 0; - already set */
>> -         e = 31;
>> -      }
>> -      else {
>> -         /* The float32 lies in the range
>> -          *   [min_normal16, max_normal16 + max_step16)
>> -          * and is rounded to a nearby float16 value. The result will be
>> -          * either normal or infinite.
>> -          */
>> -         e = new_exp + 15;
>> -         m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
>> -      }
>> -   }
>> -
>> -   assert(0 <= m && m <= 1024);
>> -   if (m == 1024) {
>> -      /* The float32 was rounded upwards into the range of the next exponent,
>> -       * so bump the exponent. This correctly handles the case where f32
>> -       * should be rounded up to float16 infinity.
>> -       */
>> -      ++e;
>> -      m = 0;
>> -   }
>> -
>> -   result = (s << 15) | (e << 10) | m;
>> -   return result;
>> -}
>> -
>> -
>> -/**
>> - * Convert a 2-byte half float to a 4-byte float.
>> - * Based on code from:
>> - * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
>> - */
>> -float
>> -_mesa_half_to_float(GLhalfARB val)
>> -{
>> -   /* XXX could also use a 64K-entry lookup table */
>> -   const int m = val & 0x3ff;
>> -   const int e = (val >> 10) & 0x1f;
>> -   const int s = (val >> 15) & 0x1;
>> -   int flt_m, flt_e, flt_s;
>> -   fi_type fi;
>> -   float result;
>> -
>> -   /* sign bit */
>> -   flt_s = s;
>> -
>> -   /* handle special cases */
>> -   if ((e == 0) && (m == 0)) {
>> -      /* zero */
>> -      flt_m = 0;
>> -      flt_e = 0;
>> -   }
>> -   else if ((e == 0) && (m != 0)) {
>> -      /* denorm -- denorm half will fit in non-denorm single */
>> -      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
>> -      float mantissa = ((float) (m)) / 1024.0f;
>> -      float sign = s ? -1.0f : 1.0f;
>> -      return sign * mantissa * half_denorm;
>> -   }
>> -   else if ((e == 31) && (m == 0)) {
>> -      /* infinity */
>> -      flt_e = 0xff;
>> -      flt_m = 0;
>> -   }
>> -   else if ((e == 31) && (m != 0)) {
>> -      /* NaN */
>> -      flt_e = 0xff;
>> -      flt_m = 1;
>> -   }
>> -   else {
>> -      /* regular */
>> -      flt_e = e + 112;
>> -      flt_m = m << 13;
>> -   }
>> -
>> -   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
>> -   result = fi.f;
>> -   return result;
>> -}
>> -
>>  /*@}*/
>>
>>
>> diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
>> index 9024758..3a09304 100644
>> --- a/src/mesa/main/imports.h
>> +++ b/src/mesa/main/imports.h
>> @@ -42,6 +42,7 @@
>>  #include "compiler.h"
>>  #include "glheader.h"
>>  #include "errors.h"
>> +#include "util/convert.h"
>>
>>  #ifdef __cplusplus
>>  extern "C" {
>> @@ -396,12 +397,41 @@ _mesa_flsll(uint64_t n)
>>  #endif
>>  }
>>
>> +/**
>> + * Convert a 4-byte float to a 2-byte half float.
>> + *
>> + * Not all float32 values can be represented exactly as a float16 value. We
>> + * round such intermediate float32 values to the nearest float16. When the
>> + * float32 lies exactly between to float16 values, we round to the one with
>> + * an even mantissa.
>> + *
>> + * This rounding behavior has several benefits:
>> + *   - It has no sign bias.
>> + *
>> + *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
>> + *     GPU ISA.
>> + *
>> + *   - By reproducing the behavior of the GPU (at least on Intel hardware),
>> + *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
>> + *     result in the same value as if the expression were executed on the GPU.
>> + */
>> +static inline GLhalfARB
>> +_mesa_float_to_half(float val)
>> +{
>> +   return float_to_half(val);
>> +}
>>
>> -extern GLhalfARB
>> -_mesa_float_to_half(float f);
>>
>> -extern float
>> -_mesa_half_to_float(GLhalfARB h);
>> +/**
>> + * Convert a 2-byte half float to a 4-byte float.
>> + * Based on code from:
>> + * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
>> + */
>> +static inline float
>> +_mesa_half_to_float(GLhalfARB val)
>> +{
>> +   return half_to_float(val);
>> +}
>>
>>  static inline bool
>>  _mesa_half_is_negative(GLhalfARB h)
>> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
>> index e45431d..71b2a25 100644
>> --- a/src/util/Makefile.sources
>> +++ b/src/util/Makefile.sources
>> @@ -1,5 +1,7 @@
>>  MESA_UTIL_FILES :=     \
>>         bitset.h \
>> +       convert.c \
>> +       convert.h \
>>         debug.c \
>>         debug.h \
>>         format_srgb.h \
>> diff --git a/src/util/convert.c b/src/util/convert.c
>> new file mode 100644
>> index 0000000..e86e322
>> --- /dev/null
>> +++ b/src/util/convert.c
>
> half-float.c seems like a better name.

I think I started w/ math.c/h until I realized that conflicted.. just
picked convert.c/h since that didn't seem to conflict with anything,
but not stuck on the name so I can change it

>> @@ -0,0 +1,179 @@
>> +/*
>> + * Copyright © 2015 Red Hat
>
> What the?
>
> No. This code was written by Brian (commit 7eb3e9b) and Chad (commit
> 529b6d1). Its copyright belongs to VMware and Intel (and probably
> "gking" from [1])
>
> [1] https://web.archive.org/web/20030303115125/http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html

was just generic boilerplate I have for new files.. didn't mean to
step on any toes, I can change it to something else if you let me know
what is preferred..

>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Rob Clark <robclark at freedesktop.org>
>
> We're trying to stop with author lists generally, but listing yourself
> as an author on some code you copied from somewhere else is not okay.

sorry, was just boilerplate.. no objection to changing it to something else

>> + */
>> +
>> +#include <math.h>
>> +#include <assert.h>
>> +#include "convert.h"
>> +#include "rounding.h"
>> +
>> +typedef union { float f; int32_t i; uint32_t u; } fi_type;
>> +
>> +/**
>> + * Convert a 4-byte float to a 2-byte half float.
>> + *
>> + * Not all float32 values can be represented exactly as a float16 value. We
>> + * round such intermediate float32 values to the nearest float16. When the
>> + * float32 lies exactly between to float16 values, we round to the one with
>> + * an even mantissa.
>> + *
>> + * This rounding behavior has several benefits:
>> + *   - It has no sign bias.
>> + *
>> + *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
>> + *     GPU ISA.
>> + *
>> + *   - By reproducing the behavior of the GPU (at least on Intel hardware),
>> + *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
>> + *     result in the same value as if the expression were executed on the GPU.
>> + */
>> +uint16_t
>> +float_to_half(float val)
>> +{
>> +   const fi_type fi = {val};
>> +   const int flt_m = fi.i & 0x7fffff;
>> +   const int flt_e = (fi.i >> 23) & 0xff;
>> +   const int flt_s = (fi.i >> 31) & 0x1;
>> +   int s, e, m = 0;
>> +   uint16_t result;
>> +
>> +   /* sign bit */
>> +   s = flt_s;
>> +
>> +   /* handle special cases */
>> +   if ((flt_e == 0) && (flt_m == 0)) {
>> +      /* zero */
>> +      /* m = 0; - already set */
>> +      e = 0;
>> +   }
>> +   else if ((flt_e == 0) && (flt_m != 0)) {
>> +      /* denorm -- denorm float maps to 0 half */
>> +      /* m = 0; - already set */
>> +      e = 0;
>> +   }
>> +   else if ((flt_e == 0xff) && (flt_m == 0)) {
>> +      /* infinity */
>> +      /* m = 0; - already set */
>> +      e = 31;
>> +   }
>> +   else if ((flt_e == 0xff) && (flt_m != 0)) {
>> +      /* NaN */
>> +      m = 1;
>> +      e = 31;
>> +   }
>> +   else {
>> +      /* regular number */
>> +      const int new_exp = flt_e - 127;
>> +      if (new_exp < -14) {
>> +         /* The float32 lies in the range (0.0, min_normal16) and is rounded
>> +          * to a nearby float16 value. The result will be either zero, subnormal,
>> +          * or normal.
>> +          */
>> +         e = 0;
>> +         m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
>> +      }
>> +      else if (new_exp > 15) {
>> +         /* map this value to infinity */
>> +         /* m = 0; - already set */
>> +         e = 31;
>> +      }
>> +      else {
>> +         /* The float32 lies in the range
>> +          *   [min_normal16, max_normal16 + max_step16)
>> +          * and is rounded to a nearby float16 value. The result will be
>> +          * either normal or infinite.
>> +          */
>> +         e = new_exp + 15;
>> +         m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
>> +      }
>> +   }
>> +
>> +   assert(0 <= m && m <= 1024);
>> +   if (m == 1024) {
>> +      /* The float32 was rounded upwards into the range of the next exponent,
>> +       * so bump the exponent. This correctly handles the case where f32
>> +       * should be rounded up to float16 infinity.
>> +       */
>> +      ++e;
>> +      m = 0;
>> +   }
>> +
>> +   result = (s << 15) | (e << 10) | m;
>> +   return result;
>> +}
>> +
>> +
>> +/**
>> + * Convert a 2-byte half float to a 4-byte float.
>> + * Based on code from:
>> + * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
>> + */
>> +float
>> +half_to_float(uint16_t val)
>> +{
>> +   /* XXX could also use a 64K-entry lookup table */
>> +   const int m = val & 0x3ff;
>> +   const int e = (val >> 10) & 0x1f;
>> +   const int s = (val >> 15) & 0x1;
>> +   int flt_m, flt_e, flt_s;
>> +   fi_type fi;
>> +   float result;
>> +
>> +   /* sign bit */
>> +   flt_s = s;
>> +
>> +   /* handle special cases */
>> +   if ((e == 0) && (m == 0)) {
>> +      /* zero */
>> +      flt_m = 0;
>> +      flt_e = 0;
>> +   }
>> +   else if ((e == 0) && (m != 0)) {
>> +      /* denorm -- denorm half will fit in non-denorm single */
>> +      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
>> +      float mantissa = ((float) (m)) / 1024.0f;
>> +      float sign = s ? -1.0f : 1.0f;
>> +      return sign * mantissa * half_denorm;
>> +   }
>> +   else if ((e == 31) && (m == 0)) {
>> +      /* infinity */
>> +      flt_e = 0xff;
>> +      flt_m = 0;
>> +   }
>> +   else if ((e == 31) && (m != 0)) {
>> +      /* NaN */
>> +      flt_e = 0xff;
>> +      flt_m = 1;
>> +   }
>> +   else {
>> +      /* regular */
>> +      flt_e = e + 112;
>> +      flt_m = m << 13;
>> +   }
>> +
>> +   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
>> +   result = fi.f;
>> +   return result;
>> +}
>> diff --git a/src/util/convert.h b/src/util/convert.h
>> new file mode 100644
>> index 0000000..30d36a1
>> --- /dev/null
>> +++ b/src/util/convert.h
>> @@ -0,0 +1,43 @@
>> +/*
>> + * Copyright © 2015 Red Hat
>
> Seems fishy.
>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Rob Clark <robclark at freedesktop.org>
>
> Again, no author list.
>
>> + */
>> +
>> +#ifndef _MATH_H_
>> +#define _MATH_H_
>
> Wrong name.

sorry, just left over from my first attempt before I realized
util/math.h conflicted w/ <math.h>..

>> +
>> +#include <stdint.h>
>> +
>> +#ifdef __cplusplus
>> +extern "C" {
>> +#endif
>> +
>> +uint16_t float_to_half(float val);
>> +float half_to_float(uint16_t val);
>
> I think these functions need to be prefixed with something -- util_*
> or something or just leave them as _mesa_*.

util_xyz conflicted with gallium u_half stuff..

current choices where just something that didn't seem to conflict with
anything, so after bikeshedding is done let me know what is preferred
and I will go with that ;-)

the point was more just how to move things around to untangle the NIR
dependency on GLSL without completely pulling the yarn out of the
sweater..

BR,
-R

>> +
>> +#ifdef __cplusplus
>> +} /* extern C */
>> +#endif
>> +
>> +#endif /* _MATH_H_ */