[Mesa-dev] [PATCH 09/20] i965/fs: Import image format conversion primitives.

Wed Jul 22 13:36:22 PDT 2015

On Tue, Jul 21, 2015 at 9:38 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> Define bitfield packing, unpacking and type conversion operations in
> terms of which the image format conversion code will be implemented.
> These don't directly know about image formats: The packing and
> unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
> widths as arguments, determining the bitfield position of each
> component.  Most of the remaining functions perform integer, fixed
> point normalized, and floating point type conversions, mapping between
> a target type with per-component bit widths given by a parameter and a
> matching native representation of the same type.
>
> v2: Drop VEC4 suport.
> v3: Rebase.
> ---
>  .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 263 +++++++++++++++++++++
>  1 file changed, 263 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> index 0c879db..ea1c4aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> @@ -323,4 +323,267 @@ namespace {
>           return dst;
>        }
>     }
> +
> +   namespace image_format_conversion {
> +      using image_format_info::color_u;
> +
> +      namespace {
> +         /**
> +          * Maximum representable value in an unsigned integer with the given
> +          * number of bits.
> +          */
> +         inline unsigned
> +         scale(unsigned n)
> +         {
> +            return (1 << n) - 1;
> +         }
> +      }
> +
> +      /**
> +       * Pack the vector \p src in a bitfield given the per-component bit
> +       * shifts and widths.

You should comment in here that it assumes that either everything fits
in 32 bits or that it is homogeneous with a power-of-two width.  More
specifically, it only works if no component spans a 32-bit boundary.

> +       */
> +      fs_reg
> +      emit_pack(const fs_builder &bld, const fs_reg &src,
> +                const color_u &shifts, const color_u &widths)
> +      {
> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> +         bool seen[4] = {};
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
> +
> +               /* Shift each component left to the correct bitfield position. */
> +               bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
> +
> +               /* Add everything up. */
> +               if (seen[shifts[c] / 32]) {
> +                  bld.OR(offset(dst, bld, shifts[c] / 32),
> +                         offset(dst, bld, shifts[c] / 32), tmp);
> +               } else {
> +                  bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
> +                  seen[shifts[c] / 32] = true;
> +               }
> +            }
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Unpack a vector from the bitfield \p src given the per-component bit
> +       * shifts and widths.

Same comment here.

> +       */
> +      fs_reg
> +      emit_unpack(const fs_builder &bld, const fs_reg &src,
> +                  const color_u &shifts, const color_u &widths)
> +      {
> +         const fs_reg dst = bld.vgrf(src.type, 4);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               /* Shift left to discard the most significant bits. */
> +               bld.SHL(offset(dst, bld, c),
> +                       offset(src, bld, shifts[c] / 32),
> +                       fs_reg(32 - shifts[c] % 32 - widths[c]));
> +
> +               /* Shift back to the least significant bits using an arithmetic
> +                * shift to get sign extension on signed types.
> +                */
> +               bld.ASR(offset(dst, bld, c),
> +                       offset(dst, bld, c), fs_reg(32 - widths[c]));
> +            }
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Convert a vector into an integer vector of the specified signedness
> +       * and bit widths, properly handling overflow.
> +       */
> +      fs_reg
> +      emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
> +                              const color_u &widths, bool is_signed)
> +      {
> +         const unsigned s = (is_signed ? 1 : 0);
> +         const fs_reg dst = bld.vgrf(
> +            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> +               /* Clamp to the minimum value. */
> +               if (is_signed)
> +                  bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
> +                                  fs_reg(-(int)scale(widths[c] - s) - 1),
> +                                  BRW_CONDITIONAL_G);

If it isn't signed, shouldn't you still do a min/max with zero?  Also,
I think you want to do the minmax while its still a float.  Otherwise,
floating-point values bigger than, say 2^32-1, may roll over.  Unless,
of course, our hardware does clamping as part of float -> int
conversion.

> +
> +               /* Clamp to the maximum value. */
> +               bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
> +                               fs_reg((int)scale(widths[c] - s)),
> +                               BRW_CONDITIONAL_L);
> +            }
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Convert a normalized fixed-point vector of the specified signedness
> +       * and bit widths into a floating point vector.
> +       */
> +      fs_reg
> +      emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
> +                               const color_u &widths, bool is_signed)
> +      {
> +         const unsigned s = (is_signed ? 1 : 0);
> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               /* Convert to float. */
> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> +               /* Divide by the normalization constants. */
> +               bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
> +                       fs_reg(1.0f / scale(widths[c] - s)));
> +
> +               /* Clamp to the minimum value. */
> +               if (is_signed)
> +                  bld.emit_minmax(offset(dst, bld, c),
> +                                  offset(dst, bld, c), fs_reg(-1.0f),
> +                                  BRW_CONDITIONAL_G);
> +            }
> +         }
> +         return dst;
> +      }
> +
> +      /**
> +       * Convert a floating point vector into a normalized fixed-point vector
> +       * of the specified signedness and bit widths.
> +       */
> +      fs_reg
> +      emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
> +                             const color_u &widths, bool is_signed)
> +      {
> +         const unsigned s = (is_signed ? 1 : 0);
> +         const fs_reg dst = bld.vgrf(
> +            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
> +
> +               /* Clamp to the minimum value. */
> +               if (is_signed)
> +                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> +                                  fs_reg(-1.0f), BRW_CONDITIONAL_G);

Again, clamp to 0 for unsigned?

> +
> +               /* Clamp to the maximum value. */
> +               bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> +                               fs_reg(1.0f), BRW_CONDITIONAL_L);
> +
> +               /* Multiply by the normalization constants. */
> +               bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
> +                       fs_reg((float)scale(widths[c] - s)));
> +
> +               /* Convert to integer. */
> +               bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
> +               bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
> +            }
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Convert a floating point vector of the specified bit widths into a
> +       * 32-bit floating point vector.
> +       */
> +      fs_reg
> +      emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
> +                              const color_u &widths)
> +      {
> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> +               /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
> +                * This works because they have a 5-bit exponent just like the
> +                * 16-bit floating point format, and they have no sign bit.
> +                */
> +               if (widths[c] < 16)
> +                  bld.SHL(offset(dst, bld, c),
> +                          offset(dst, bld, c), fs_reg(15 - widths[c]));
> +
> +               /* Convert to 32-bit floating point. */
> +               bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
> +            }
> +         }
> +
> +         return fdst;
> +      }
> +
> +      /**
> +       * Convert a vector into a floating point vector of the specified bit
> +       * widths.
> +       */
> +      fs_reg
> +      emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
> +                            const color_u &widths)
> +      {
> +         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> +         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> +         for (unsigned c = 0; c < 4; ++c) {
> +            if (widths[c]) {
> +               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
> +
> +               /* Clamp to the minimum value. */
> +               if (widths[c] < 16)
> +                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> +                                  fs_reg(0.0f), BRW_CONDITIONAL_G);
> +
> +               /* Convert to 16-bit floating-point. */
> +               bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
> +
> +               /* Discard the least significant bits to get floating point
> +                * numbers of the requested width.  This works because the
> +                * 10-bit and 11-bit floating point formats have a 5-bit
> +                * exponent just like the 16-bit format, and they have no sign
> +                * bit.
> +                */
> +               if (widths[c] < 16)
> +                  bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
> +                          fs_reg(15 - widths[c]));
> +            }
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Fill missing components of a vector with 0, 0, 0, 1.
> +       */
> +      fs_reg
> +      emit_pad(const fs_builder &bld, const fs_reg &src,
> +               const color_u &widths)
> +      {
> +         const fs_reg dst = bld.vgrf(src.type, 4);
> +         const unsigned pad[] = { 0, 0, 0, 1 };
> +
> +         for (unsigned c = 0; c < 4; ++c)
> +            bld.MOV(offset(dst, bld, c),
> +                    widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
> +
> +         return dst;
> +      }
> +   }
>  }
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev