[Mesa-dev] [PATCH 1/2] tgsi: add PK2H/UP2H support

Mon Feb 1 15:36:01 UTC 2016

On 01/30/2016 07:08 PM, sroland at vmware.com wrote:
> From: Ilia Mirkin <imirkin at alum.mit.edu>
>
> The util functions handle the half-float conversion.
> Note that piglit won't like it much due to:
> a) The util functions use magic float mul conversion but when run inside
> softpipe/llvmpipe, denorms are flushed to zero, therefore when the conversion
> is from/to f16 denorm the result will be zero. This is a bug which should be
> fixed in these functions (should not rely on denorms being available), but
> will happen elsewhere just the same (e.g. conversion to f16 render targets).
> b) The util functions use trunc round mode rather than round-to-nearest. This
> is NOT a bug (as it is a d3d10 requirement). This will result of rounding not
> representable finite values to MAX_F16 rather than INFINITY. My belief is the
> piglit tests are wrong here but it's difficult to tell (generally glsl
> rounding mode is undefined, however I'm not sure if rounding mode might need
> to be consistent for different operations). Nevertheless, for gl it would be
> better to use round-to-nearest, but using different rounding for GL and d3d10
> is an unsolved problem (as it affects things like conversion to f16 render
> targets, clear colors, this shader opcode).
> Hence for now don't enable the cap bit (so the code is unused).
> (Code is from imirkin, comment from sroland)

Minor code nit-picks below.

Otherwise,
Reviewed-by: Brian Paul <brianp at vmware.com>

>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> Reviewed-by: Roland Scheidegger <sroland at vmvware.com>
> ---
>   src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++++++++++++++++++++++++++++++++--
>   src/gallium/auxiliary/util/u_half.h    |  7 +++++-
>   2 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index f67c162..12a477b 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -58,6 +58,7 @@
>   #include "tgsi/tgsi_parse.h"
>   #include "tgsi/tgsi_util.h"
>   #include "tgsi_exec.h"
> +#include "util/u_half.h"
>   #include "util/u_memory.h"
>   #include "util/u_math.h"
>
> @@ -3058,6 +3059,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
>   }
>
>   static void
> +exec_pk2h(struct tgsi_exec_machine *mach,
> +          const struct tgsi_full_instruction *inst)
> +{
> +   unsigned int chan;

Just "unsigned"

> +   union tgsi_exec_channel arg[2], dst;
> +
> +   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
> +   fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
> +   for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
> +      dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
> +         (util_float_to_half(arg[1].f[chan]) << 16);
> +   }
> +   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
> +      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
> +         store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
> +      }
> +   }
> +}
> +
> +static void
> +exec_up2h(struct tgsi_exec_machine *mach,
> +          const struct tgsi_full_instruction *inst)
> +{
> +   unsigned int chan;

again.

> +   union tgsi_exec_channel arg, dst[2];
> +
> +   fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
> +   for (chan = 0; chan < 4; chan++) {

s/4/TGSI_NUM_CHANNELS/

> +      dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
> +      dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
> +   }
> +   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
> +      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
> +         store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
> +      }
> +   }
> +}
> +
> +static void
>   exec_scs(struct tgsi_exec_machine *mach,
>            const struct tgsi_full_instruction *inst)
>   {
> @@ -4339,7 +4379,7 @@ exec_instruction(
>         break;
>
>      case TGSI_OPCODE_PK2H:
> -      assert (0);
> +      exec_pk2h(mach, inst);
>         break;
>
>      case TGSI_OPCODE_PK2US:
> @@ -4425,7 +4465,7 @@ exec_instruction(
>         break;
>
>      case TGSI_OPCODE_UP2H:
> -      assert (0);
> +      exec_up2h(mach, inst);
>         break;
>
>      case TGSI_OPCODE_UP2US:
> diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
> index d28fae3..966d213 100644
> --- a/src/gallium/auxiliary/util/u_half.h
> +++ b/src/gallium/auxiliary/util/u_half.h
> @@ -74,7 +74,11 @@ util_float_to_half(float f)
>         f32.ui &= round_mask;
>         f32.f  *= magic.f;
>         f32.ui -= round_mask;
> -
> +      /*
> +       * XXX: The magic mul relies on denorms being available, otherwise
> +       * all f16 denorms get flushed to zero - hence when this is used
> +       * for tgsi_exec in softpipe we won't get f16 denorms.
> +       */
>         /*
>          * Clamp to max finite value if overflowed.
>          * OpenGL has completely undefined rounding behavior for float to
> @@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
>
>      /* Adjust */
>      f32.f *= magic.f;
> +   /* XXX: The magic mul relies on denorms being available */
>
>      /* Inf / NaN */
>      if (f32.f >= infnan.f)
>