[Mesa-dev] [PATCH v4 22/44] i965/fs: Helpers for un/shuffle 16-bit pairs in 32-bit components

Thu Nov 30 22:21:34 UTC 2017

On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo <
jmcasanova at igalia.com> wrote:

> This helpers are used to load/store 16-bit types from/to 32-bit
> components.
>
> The functions shuffle_32bit_load_result_to_16bit_data and
> shuffle_16bit_data_for_32bit_write are implemented in a similar
> way than the analogous functions for handling 64-bit types.
> ---
>  src/intel/compiler/brw_fs.h       | 11 +++++++++
>  src/intel/compiler/brw_fs_nir.cpp | 51 ++++++++++++++++++++++++++++++
> +++++++++
>  2 files changed, 62 insertions(+)
>
> diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
> index 19b897e7a9..30557324d5 100644
> --- a/src/intel/compiler/brw_fs.h
> +++ b/src/intel/compiler/brw_fs.h
> @@ -497,6 +497,17 @@ void shuffle_32bit_load_result_to_64bit_data(const
> brw::fs_builder &bld,
>  fs_reg shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
>                                            const fs_reg &src,
>                                            uint32_t components);
> +
> +void shuffle_32bit_load_result_to_16bit_data(const brw::fs_builder &bld,
> +                                             const fs_reg &dst,
> +                                             const fs_reg &src,
> +                                             uint32_t components);
> +
> +void shuffle_16bit_data_for_32bit_write(const brw::fs_builder &bld,
> +                                        const fs_reg &dst,
> +                                        const fs_reg &src,
> +                                        uint32_t components);
> +
>  fs_reg setup_imm_df(const brw::fs_builder &bld,
>                      double v);
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index 726b2fcee7..c091241132 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4828,6 +4828,33 @@ shuffle_32bit_load_result_to_64bit_data(const
> fs_builder &bld,
>     }
>  }
>
> +void
> +shuffle_32bit_load_result_to_16bit_data(const fs_builder &bld,
> +                                        const fs_reg &dst,
> +                                        const fs_reg &src,
> +                                        uint32_t components)
> +{
> +   assert(type_sz(src.type) == 4);
> +   assert(type_sz(dst.type) == 2);
> +
> +   fs_reg tmp = retype(bld.vgrf(src.type), dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +      const fs_reg component_i = subscript(offset(src, bld, i / 2),
> dst.type, i % 2);
> +
> +      bld.MOV(offset(tmp, bld, i % 2), component_i);
> +
> +      if (i % 2) {
> +         bld.MOV(offset(dst, bld, i -1), offset(tmp, bld, 0));
> +         bld.MOV(offset(dst, bld, i), offset(tmp, bld, 1));
> +      }
>

I'm very confused by this extra moving.  Why can't we just do

bld.MOV(offset(dst, bld, i), component_i);

above?  Maybe I'm missing something but I don't see why the extra moves are
needed.

> +   }
> +   if (components % 2) {
> +      bld.MOV(offset(dst, bld, components - 1), tmp);
> +   }
> +}
> +
> +
>  /**
>   * This helper does the inverse operation of
>   * SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT_DATA.
> @@ -4860,6 +4887,30 @@ shuffle_64bit_data_for_32bit_write(const
> fs_builder &bld,
>     return dst;
>  }
>
> +void
> +shuffle_16bit_data_for_32bit_write(const fs_builder &bld,
> +                                   const fs_reg &dst,
> +                                   const fs_reg &src,
> +                                   uint32_t components)
> +{
> +   assert(type_sz(src.type) == 2);
> +   assert(type_sz(dst.type) == 4);
> +
> +   fs_reg tmp = bld.vgrf(dst.type);
> +
> +   for (unsigned i = 0; i < components; i++) {
> +      const fs_reg component_i = offset(src, bld, i);
> +      bld.MOV(subscript(tmp, src.type, i % 2), component_i);
> +      if (i % 2) {
> +         bld.MOV(offset(dst, bld, i / 2), tmp);
> +      }
>

Again, why the extra MOVs?  Why not

bld.MOV(subscript(offset(tmp, bld, i / 2), src.type, i % 2), component_i);

instead of the extra MOVs?

--Jason

> +   }
> +   if (components % 2) {
> +      bld.MOV(offset(dst, bld, components / 2), tmp);
> +   }
> +}
> +
> +
>  fs_reg
>  setup_imm_df(const fs_builder &bld, double v)
>  {
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171130/034f6da7/attachment-0001.html>