[Mesa-dev] [PATCH 09/20] i965/fs: Import image format conversion primitives.
Jason Ekstrand
jason at jlekstrand.net
Wed Jul 22 13:36:22 PDT 2015
On Tue, Jul 21, 2015 at 9:38 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> Define bitfield packing, unpacking and type conversion operations in
> terms of which the image format conversion code will be implemented.
> These don't directly know about image formats: The packing and
> unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
> widths as arguments, determining the bitfield position of each
> component. Most of the remaining functions perform integer, fixed
> point normalized, and floating point type conversions, mapping between
> a target type with per-component bit widths given by a parameter and a
> matching native representation of the same type.
>
> v2: Drop VEC4 suport.
> v3: Rebase.
> ---
> .../drivers/dri/i965/brw_fs_surface_builder.cpp | 263 +++++++++++++++++++++
> 1 file changed, 263 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> index 0c879db..ea1c4aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> @@ -323,4 +323,267 @@ namespace {
> return dst;
> }
> }
> +
> + namespace image_format_conversion {
> + using image_format_info::color_u;
> +
> + namespace {
> + /**
> + * Maximum representable value in an unsigned integer with the given
> + * number of bits.
> + */
> + inline unsigned
> + scale(unsigned n)
> + {
> + return (1 << n) - 1;
> + }
> + }
> +
> + /**
> + * Pack the vector \p src in a bitfield given the per-component bit
> + * shifts and widths.
You should comment in here that it assumes that either everything fits
in 32 bits or that it is homogeneous with a power-of-two width. More
specifically, it only works if no component spans a 32-bit boundary.
> + */
> + fs_reg
> + emit_pack(const fs_builder &bld, const fs_reg &src,
> + const color_u &shifts, const color_u &widths)
> + {
> + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> + bool seen[4] = {};
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
> +
> + /* Shift each component left to the correct bitfield position. */
> + bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
> +
> + /* Add everything up. */
> + if (seen[shifts[c] / 32]) {
> + bld.OR(offset(dst, bld, shifts[c] / 32),
> + offset(dst, bld, shifts[c] / 32), tmp);
> + } else {
> + bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
> + seen[shifts[c] / 32] = true;
> + }
> + }
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Unpack a vector from the bitfield \p src given the per-component bit
> + * shifts and widths.
Same comment here.
> + */
> + fs_reg
> + emit_unpack(const fs_builder &bld, const fs_reg &src,
> + const color_u &shifts, const color_u &widths)
> + {
> + const fs_reg dst = bld.vgrf(src.type, 4);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + /* Shift left to discard the most significant bits. */
> + bld.SHL(offset(dst, bld, c),
> + offset(src, bld, shifts[c] / 32),
> + fs_reg(32 - shifts[c] % 32 - widths[c]));
> +
> + /* Shift back to the least significant bits using an arithmetic
> + * shift to get sign extension on signed types.
> + */
> + bld.ASR(offset(dst, bld, c),
> + offset(dst, bld, c), fs_reg(32 - widths[c]));
> + }
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Convert a vector into an integer vector of the specified signedness
> + * and bit widths, properly handling overflow.
> + */
> + fs_reg
> + emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths, bool is_signed)
> + {
> + const unsigned s = (is_signed ? 1 : 0);
> + const fs_reg dst = bld.vgrf(
> + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> + /* Clamp to the minimum value. */
> + if (is_signed)
> + bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
> + fs_reg(-(int)scale(widths[c] - s) - 1),
> + BRW_CONDITIONAL_G);
If it isn't signed, shouldn't you still do a min/max with zero? Also,
I think you want to do the minmax while its still a float. Otherwise,
floating-point values bigger than, say 2^32-1, may roll over. Unless,
of course, our hardware does clamping as part of float -> int
conversion.
> +
> + /* Clamp to the maximum value. */
> + bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
> + fs_reg((int)scale(widths[c] - s)),
> + BRW_CONDITIONAL_L);
> + }
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Convert a normalized fixed-point vector of the specified signedness
> + * and bit widths into a floating point vector.
> + */
> + fs_reg
> + emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths, bool is_signed)
> + {
> + const unsigned s = (is_signed ? 1 : 0);
> + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + /* Convert to float. */
> + bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> + /* Divide by the normalization constants. */
> + bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
> + fs_reg(1.0f / scale(widths[c] - s)));
> +
> + /* Clamp to the minimum value. */
> + if (is_signed)
> + bld.emit_minmax(offset(dst, bld, c),
> + offset(dst, bld, c), fs_reg(-1.0f),
> + BRW_CONDITIONAL_G);
> + }
> + }
> + return dst;
> + }
> +
> + /**
> + * Convert a floating point vector into a normalized fixed-point vector
> + * of the specified signedness and bit widths.
> + */
> + fs_reg
> + emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths, bool is_signed)
> + {
> + const unsigned s = (is_signed ? 1 : 0);
> + const fs_reg dst = bld.vgrf(
> + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
> + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
> +
> + /* Clamp to the minimum value. */
> + if (is_signed)
> + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> + fs_reg(-1.0f), BRW_CONDITIONAL_G);
Again, clamp to 0 for unsigned?
> +
> + /* Clamp to the maximum value. */
> + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> + fs_reg(1.0f), BRW_CONDITIONAL_L);
> +
> + /* Multiply by the normalization constants. */
> + bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
> + fs_reg((float)scale(widths[c] - s)));
> +
> + /* Convert to integer. */
> + bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
> + bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
> + }
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Convert a floating point vector of the specified bit widths into a
> + * 32-bit floating point vector.
> + */
> + fs_reg
> + emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths)
> + {
> + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + bld.MOV(offset(dst, bld, c), offset(src, bld, c));
> +
> + /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
> + * This works because they have a 5-bit exponent just like the
> + * 16-bit floating point format, and they have no sign bit.
> + */
> + if (widths[c] < 16)
> + bld.SHL(offset(dst, bld, c),
> + offset(dst, bld, c), fs_reg(15 - widths[c]));
> +
> + /* Convert to 32-bit floating point. */
> + bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
> + }
> + }
> +
> + return fdst;
> + }
> +
> + /**
> + * Convert a vector into a floating point vector of the specified bit
> + * widths.
> + */
> + fs_reg
> + emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths)
> + {
> + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
> +
> + for (unsigned c = 0; c < 4; ++c) {
> + if (widths[c]) {
> + bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
> +
> + /* Clamp to the minimum value. */
> + if (widths[c] < 16)
> + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
> + fs_reg(0.0f), BRW_CONDITIONAL_G);
> +
> + /* Convert to 16-bit floating-point. */
> + bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
> +
> + /* Discard the least significant bits to get floating point
> + * numbers of the requested width. This works because the
> + * 10-bit and 11-bit floating point formats have a 5-bit
> + * exponent just like the 16-bit format, and they have no sign
> + * bit.
> + */
> + if (widths[c] < 16)
> + bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
> + fs_reg(15 - widths[c]));
> + }
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Fill missing components of a vector with 0, 0, 0, 1.
> + */
> + fs_reg
> + emit_pad(const fs_builder &bld, const fs_reg &src,
> + const color_u &widths)
> + {
> + const fs_reg dst = bld.vgrf(src.type, 4);
> + const unsigned pad[] = { 0, 0, 0, 1 };
> +
> + for (unsigned c = 0; c < 4; ++c)
> + bld.MOV(offset(dst, bld, c),
> + widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
> +
> + return dst;
> + }
> + }
> }
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list