[Mesa-dev] [PATCH 09/20] i965/fs: Import image format conversion primitives.

Thu Jul 23 04:24:43 PDT 2015

Jason Ekstrand <jason at jlekstrand.net> writes:

> On Tue, Jul 21, 2015 at 9:38 AM, Francisco Jerez <currojerez at riseup.net> wrote:
>> Define bitfield packing, unpacking and type conversion operations in
>> terms of which the image format conversion code will be implemented.
>> These don't directly know about image formats: The packing and
>> unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
>> widths as arguments, determining the bitfield position of each
>> component.  Most of the remaining functions perform integer, fixed
>> point normalized, and floating point type conversions, mapping between
>> a target type with per-component bit widths given by a parameter and a
>> matching native representation of the same type.
>>
>> v2: Drop VEC4 suport.
>> v3: Rebase.
>> ---
>>  .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 263 +++++++++++++++++++++
>>  1 file changed, 263 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> index 0c879db..ea1c4aa 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> @@ -323,4 +323,267 @@ namespace {
>>           return dst;
>>        }
>>     }
>> +
>> +   namespace image_format_conversion {
>> +      using image_format_info::color_u;
>> +
>> +      namespace {
>> +         /**
>> +          * Maximum representable value in an unsigned integer with the given
>> +          * number of bits.
>> +          */
>> +         inline unsigned
>> +         scale(unsigned n)
>> +         {
>> +            return (1 << n) - 1;
>> +         }
>> +      }
>> +
>> +      /**
>> +       * Pack the vector \p src in a bitfield given the per-component bit
>> +       * shifts and widths.
>
> You should comment in here that it assumes that either everything fits
> in 32 bits or that it is homogeneous with a power-of-two width.

I don't think that's assumed here, non-power-of-two should work
regardless of the components summing up more than 32 bits.

> More specifically, it only works if no component spans a 32-bit
> boundary.

That's right, no component may cross a 32-bit boundary, I'll mention
that in the comment.

>
>> +       */
>> +      fs_reg
>> +      emit_pack(const fs_builder &bld, const fs_reg &src,
>> +                const color_u &shifts, const color_u &widths)
>> +      {
>> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
>> +         bool seen[4] = {};
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
>> +
>> +               /* Shift each component left to the correct bitfield position. */
>> +               bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
>> +
>> +               /* Add everything up. */
>> +               if (seen[shifts[c] / 32]) {
>> +                  bld.OR(offset(dst, bld, shifts[c] / 32),
>> +                         offset(dst, bld, shifts[c] / 32), tmp);
>> +               } else {
>> +                  bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
>> +                  seen[shifts[c] / 32] = true;
>> +               }
>> +            }
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Unpack a vector from the bitfield \p src given the per-component bit
>> +       * shifts and widths.
>
> Same comment here.
>
>> +       */
>> +      fs_reg
>> +      emit_unpack(const fs_builder &bld, const fs_reg &src,
>> +                  const color_u &shifts, const color_u &widths)
>> +      {
>> +         const fs_reg dst = bld.vgrf(src.type, 4);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               /* Shift left to discard the most significant bits. */
>> +               bld.SHL(offset(dst, bld, c),
>> +                       offset(src, bld, shifts[c] / 32),
>> +                       fs_reg(32 - shifts[c] % 32 - widths[c]));
>> +
>> +               /* Shift back to the least significant bits using an arithmetic
>> +                * shift to get sign extension on signed types.
>> +                */
>> +               bld.ASR(offset(dst, bld, c),
>> +                       offset(dst, bld, c), fs_reg(32 - widths[c]));
>> +            }
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Convert a vector into an integer vector of the specified signedness
>> +       * and bit widths, properly handling overflow.
>> +       */
>> +      fs_reg
>> +      emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
>> +                              const color_u &widths, bool is_signed)
>> +      {
>> +         const unsigned s = (is_signed ? 1 : 0);
>> +         const fs_reg dst = bld.vgrf(
>> +            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
>> +
>> +               /* Clamp to the minimum value. */
>> +               if (is_signed)
>> +                  bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
>> +                                  fs_reg(-(int)scale(widths[c] - s) - 1),
>> +                                  BRW_CONDITIONAL_G);
>
> If it isn't signed, shouldn't you still do a min/max with zero?

No, it's not necessary, if the datatype is unsigned the argument will
never be lower than zero so only one of the overflow cases needs to be
handled.  Note that the value of "is_signed" is already determined from
the argument type alone, it's passed in that way mainly for symmetry
with emit_convert_to_scaled() and because I didn't quite trust the
visitor to have set the source type correctly when calling
emit_image_store().  I could add 'assert(src.type == dst.type)' here if
you like.

> Also, I think you want to do the minmax while its still a float.

It never is.  The argument of this function must already be an integer,
I'll clarify that in the documentation.

> Otherwise, floating-point values bigger than, say 2^32-1, may roll
> over.  Unless, of course, our hardware does clamping as part of float
> -> int conversion.
>
>> +
>> +               /* Clamp to the maximum value. */
>> +               bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
>> +                               fs_reg((int)scale(widths[c] - s)),
>> +                               BRW_CONDITIONAL_L);
>> +            }
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Convert a normalized fixed-point vector of the specified signedness
>> +       * and bit widths into a floating point vector.
>> +       */
>> +      fs_reg
>> +      emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
>> +                               const color_u &widths, bool is_signed)
>> +      {
>> +         const unsigned s = (is_signed ? 1 : 0);
>> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               /* Convert to float. */
>> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
>> +
>> +               /* Divide by the normalization constants. */
>> +               bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
>> +                       fs_reg(1.0f / scale(widths[c] - s)));
>> +
>> +               /* Clamp to the minimum value. */
>> +               if (is_signed)
>> +                  bld.emit_minmax(offset(dst, bld, c),
>> +                                  offset(dst, bld, c), fs_reg(-1.0f),
>> +                                  BRW_CONDITIONAL_G);
>> +            }
>> +         }
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Convert a floating point vector into a normalized fixed-point vector
>> +       * of the specified signedness and bit widths.
>> +       */
>> +      fs_reg
>> +      emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
>> +                             const color_u &widths, bool is_signed)
>> +      {
>> +         const unsigned s = (is_signed ? 1 : 0);
>> +         const fs_reg dst = bld.vgrf(
>> +            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
>> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
>> +
>> +               /* Clamp to the minimum value. */
>> +               if (is_signed)
>> +                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
>> +                                  fs_reg(-1.0f), BRW_CONDITIONAL_G);
>
> Again, clamp to 0 for unsigned?
>
Hmm, this one could lead to actual bugs, I'll fix it by doing a
saturating MOV in the !is_signed case instead of the min/max sequence.

Thanks.

>> +
>> +               /* Clamp to the maximum value. */
>> +               bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
>> +                               fs_reg(1.0f), BRW_CONDITIONAL_L);
>> +
>> +               /* Multiply by the normalization constants. */
>> +               bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
>> +                       fs_reg((float)scale(widths[c] - s)));
>> +
>> +               /* Convert to integer. */
>> +               bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
>> +               bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
>> +            }
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Convert a floating point vector of the specified bit widths into a
>> +       * 32-bit floating point vector.
>> +       */
>> +      fs_reg
>> +      emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
>> +                              const color_u &widths)
>> +      {
>> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
>> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
>> +
>> +               /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
>> +                * This works because they have a 5-bit exponent just like the
>> +                * 16-bit floating point format, and they have no sign bit.
>> +                */
>> +               if (widths[c] < 16)
>> +                  bld.SHL(offset(dst, bld, c),
>> +                          offset(dst, bld, c), fs_reg(15 - widths[c]));
>> +
>> +               /* Convert to 32-bit floating point. */
>> +               bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
>> +            }
>> +         }
>> +
>> +         return fdst;
>> +      }
>> +
>> +      /**
>> +       * Convert a vector into a floating point vector of the specified bit
>> +       * widths.
>> +       */
>> +      fs_reg
>> +      emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
>> +                            const color_u &widths)
>> +      {
>> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
>> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
>> +
>> +         for (unsigned c = 0; c < 4; ++c) {
>> +            if (widths[c]) {
>> +               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
>> +
>> +               /* Clamp to the minimum value. */
>> +               if (widths[c] < 16)
>> +                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
>> +                                  fs_reg(0.0f), BRW_CONDITIONAL_G);
>> +
>> +               /* Convert to 16-bit floating-point. */
>> +               bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
>> +
>> +               /* Discard the least significant bits to get floating point
>> +                * numbers of the requested width.  This works because the
>> +                * 10-bit and 11-bit floating point formats have a 5-bit
>> +                * exponent just like the 16-bit format, and they have no sign
>> +                * bit.
>> +                */
>> +               if (widths[c] < 16)
>> +                  bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
>> +                          fs_reg(15 - widths[c]));
>> +            }
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Fill missing components of a vector with 0, 0, 0, 1.
>> +       */
>> +      fs_reg
>> +      emit_pad(const fs_builder &bld, const fs_reg &src,
>> +               const color_u &widths)
>> +      {
>> +         const fs_reg dst = bld.vgrf(src.type, 4);
>> +         const unsigned pad[] = { 0, 0, 0, 1 };
>> +
>> +         for (unsigned c = 0; c < 4; ++c)
>> +            bld.MOV(offset(dst, bld, c),
>> +                    widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
>> +
>> +         return dst;
>> +      }
>> +   }
>>  }
>> --
>> 2.4.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150723/91a35ac9/attachment.sig>