[Mesa-dev] [PATCH 1/2] tgsi: add PK2H/UP2H support

Tue Feb 2 02:35:07 UTC 2016

Am 01.02.2016 um 16:36 schrieb Brian Paul:
> On 01/30/2016 07:08 PM, sroland at vmware.com wrote:
>> From: Ilia Mirkin <imirkin at alum.mit.edu>
>>
>> The util functions handle the half-float conversion.
>> Note that piglit won't like it much due to:
>> a) The util functions use magic float mul conversion but when run inside
>> softpipe/llvmpipe, denorms are flushed to zero, therefore when the
>> conversion
>> is from/to f16 denorm the result will be zero. This is a bug which
>> should be
>> fixed in these functions (should not rely on denorms being available),
>> but
>> will happen elsewhere just the same (e.g. conversion to f16 render
>> targets).
>> b) The util functions use trunc round mode rather than
>> round-to-nearest. This
>> is NOT a bug (as it is a d3d10 requirement). This will result of
>> rounding not
>> representable finite values to MAX_F16 rather than INFINITY. My belief
>> is the
>> piglit tests are wrong here but it's difficult to tell (generally glsl
>> rounding mode is undefined, however I'm not sure if rounding mode
>> might need
>> to be consistent for different operations). Nevertheless, for gl it
>> would be
>> better to use round-to-nearest, but using different rounding for GL
>> and d3d10
>> is an unsolved problem (as it affects things like conversion to f16
>> render
>> targets, clear colors, this shader opcode).
>> Hence for now don't enable the cap bit (so the code is unused).
>> (Code is from imirkin, comment from sroland)
> 
> Minor code nit-picks below.
> 
> Otherwise,
> Reviewed-by: Brian Paul <brianp at vmware.com>
> 
> 
>>
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> Reviewed-by: Roland Scheidegger <sroland at vmvware.com>
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_exec.c | 44
>> ++++++++++++++++++++++++++++++++--
>>   src/gallium/auxiliary/util/u_half.h    |  7 +++++-
>>   2 files changed, 48 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> index f67c162..12a477b 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
>> @@ -58,6 +58,7 @@
>>   #include "tgsi/tgsi_parse.h"
>>   #include "tgsi/tgsi_util.h"
>>   #include "tgsi_exec.h"
>> +#include "util/u_half.h"
>>   #include "util/u_memory.h"
>>   #include "util/u_math.h"
>>
>> @@ -3058,6 +3059,45 @@ exec_dp2(struct tgsi_exec_machine *mach,
>>   }
>>
>>   static void
>> +exec_pk2h(struct tgsi_exec_machine *mach,
>> +          const struct tgsi_full_instruction *inst)
>> +{
>> +   unsigned int chan;
> 
> Just "unsigned"
Ok fixed (fwiw all other exec functions in there use "unsigned int" for
some reason too).

> 
> 
>> +   union tgsi_exec_channel arg[2], dst;
>> +
>> +   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X,
>> TGSI_EXEC_DATA_FLOAT);
>> +   fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y,
>> TGSI_EXEC_DATA_FLOAT);
>> +   for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
>> +      dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
>> +         (util_float_to_half(arg[1].f[chan]) << 16);
>> +   }
>> +   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>> +      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
>> +         store_dest(mach, &dst, &inst->Dst[0], inst, chan,
>> TGSI_EXEC_DATA_UINT);
>> +      }
>> +   }
>> +}
>> +
>> +static void
>> +exec_up2h(struct tgsi_exec_machine *mach,
>> +          const struct tgsi_full_instruction *inst)
>> +{
>> +   unsigned int chan;
> 
> again.
> 
> 
>> +   union tgsi_exec_channel arg, dst[2];
>> +
>> +   fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X,
>> TGSI_EXEC_DATA_UINT);
>> +   for (chan = 0; chan < 4; chan++) {
> 
> s/4/TGSI_NUM_CHANNELS/
Looking at that, it's actually cycling through TGSI_QUAD_SIZE. Fixed.

> 
>> +      dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
>> +      dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
>> +   }
>> +   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>> +      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
>> +         store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan,
>> TGSI_EXEC_DATA_FLOAT);
>> +      }
>> +   }
>> +}
>> +
>> +static void
>>   exec_scs(struct tgsi_exec_machine *mach,
>>            const struct tgsi_full_instruction *inst)
>>   {
>> @@ -4339,7 +4379,7 @@ exec_instruction(
>>         break;
>>
>>      case TGSI_OPCODE_PK2H:
>> -      assert (0);
>> +      exec_pk2h(mach, inst);
>>         break;
>>
>>      case TGSI_OPCODE_PK2US:
>> @@ -4425,7 +4465,7 @@ exec_instruction(
>>         break;
>>
>>      case TGSI_OPCODE_UP2H:
>> -      assert (0);
>> +      exec_up2h(mach, inst);
>>         break;
>>
>>      case TGSI_OPCODE_UP2US:
>> diff --git a/src/gallium/auxiliary/util/u_half.h
>> b/src/gallium/auxiliary/util/u_half.h
>> index d28fae3..966d213 100644
>> --- a/src/gallium/auxiliary/util/u_half.h
>> +++ b/src/gallium/auxiliary/util/u_half.h
>> @@ -74,7 +74,11 @@ util_float_to_half(float f)
>>         f32.ui &= round_mask;
>>         f32.f  *= magic.f;
>>         f32.ui -= round_mask;
>> -
>> +      /*
>> +       * XXX: The magic mul relies on denorms being available, otherwise
>> +       * all f16 denorms get flushed to zero - hence when this is used
>> +       * for tgsi_exec in softpipe we won't get f16 denorms.
>> +       */
>>         /*
>>          * Clamp to max finite value if overflowed.
>>          * OpenGL has completely undefined rounding behavior for float to
>> @@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16)
>>
>>      /* Adjust */
>>      f32.f *= magic.f;
>> +   /* XXX: The magic mul relies on denorms being available */
>>
>>      /* Inf / NaN */
>>      if (f32.f >= infnan.f)
>>
> 

Thanks for the review!

Roland