[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

Jason Ekstrand jason at jlekstrand.net
Thu Jun 15 05:34:03 UTC 2017


On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:

> If all the swizzles are inside the src channels range than we can just
> grab the srcs we need rather than converting everything.
>
> perf report convert_float() going from ~10% -> ~7% for the when
> running the following glean test:
>
> glean -o -v -v -v -t +pointAtten
>
> Cc: Jason Ekstrand <jason at jlekstrand.net>
> ---
>
>  Hi Jason,
>
>  I've only perf tested the above glean test. What did you use to benchmark
>  this when you wrote it?
>

The teximage-colors test has a benchmark flag which I added at the time.  I
trust that a lot more than some random glean test. :-)

--Jason


>  Thanks,
>  Tim
>
>  src/mesa/main/format_utils.c | 84 ++++++++++++++++++++++++++++++
> +++-----------
>  1 file changed, 63 insertions(+), 21 deletions(-)
>
> diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
> index 65e65d4..1649ac0 100644
> --- a/src/mesa/main/format_utils.c
> +++ b/src/mesa/main/format_utils.c
> @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
>   *
>   * \param   DST_TYPE    the C datatype of the destination
>   * \param   DST_CHANS   the number of destination channels
>   * \param   SRC_TYPE    the C datatype of the source
>   * \param   SRC_CHANS   the number of source channels
>   * \param   CONV        an expression for converting from the source data,
>   *                      storred in the variable "src", to the destination
>   *                      format
>   */
>  #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS,
> CONV) \
> -   do {                                           \
> -      int s, j;                                   \
> -      for (s = 0; s < count; ++s) {               \
> -         for (j = 0; j < SRC_CHANS; ++j) {        \
> -            SRC_TYPE src = typed_src[j];          \
> -            tmp[j] = CONV;                        \
> -         }                                        \
> -                                                  \
> -         typed_dst[0] = tmp[swizzle_x];           \
> -         if (DST_CHANS > 1) {                     \
> -            typed_dst[1] = tmp[swizzle_y];        \
> -            if (DST_CHANS > 2) {                  \
> -               typed_dst[2] = tmp[swizzle_z];     \
> -               if (DST_CHANS > 3) {               \
> -                  typed_dst[3] = tmp[swizzle_w];  \
> -               }                                  \
> -            }                                     \
> -         }                                        \
> -         typed_src += SRC_CHANS;                  \
> -         typed_dst += DST_CHANS;                  \
> -      }                                           \
> +   do {                                                  \
> +      bool fast_path = false;                            \
> +      if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)       \
> +         fast_path = true;                               \
> +      if (DST_CHANS == 2 && swizzle_x < SRC_CHANS &&     \
> +          swizzle_y < SRC_CHANS)                         \
> +         fast_path = true;                               \
> +      if (DST_CHANS == 3 && swizzle_x < SRC_CHANS &&     \
> +          swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
> +         fast_path = true;                               \
> +      if (DST_CHANS == 4 && swizzle_x < SRC_CHANS &&     \
> +          swizzle_y < SRC_CHANS &&                       \
> +          swizzle_z < SRC_CHANS &&                       \
> +          swizzle_w < SRC_CHANS)                         \
> +         fast_path = true;                               \
> +                                                         \
> +      /* The fast path avoids copying/converting srcs we \
> +       * will never use.                                 \
> +       */                                                \
> +      if (fast_path) {                                   \
> +         for (int s = 0; s < count; ++s) {               \
> +            SRC_TYPE src = typed_src[swizzle_x];         \
> +            tmp[swizzle_x] = CONV;                       \
> +            typed_dst[0] = tmp[swizzle_x];               \
> +            if (DST_CHANS > 1) {                         \
> +               SRC_TYPE src = typed_src[swizzle_y];      \
> +               tmp[swizzle_y] = CONV;                    \
> +               typed_dst[1] = tmp[swizzle_y];            \
> +               if (DST_CHANS > 2) {                      \
> +                  SRC_TYPE src = typed_src[swizzle_z];   \
> +                  tmp[swizzle_z] = CONV;                 \
> +                  typed_dst[2] = tmp[swizzle_z];         \
> +                  if (DST_CHANS > 3) {                   \
> +                     SRC_TYPE src = typed_src[swizzle_w];\
> +                     tmp[swizzle_w] = CONV;              \
> +                     typed_dst[3] = tmp[swizzle_w];      \
> +                  }                                      \
> +               }                                         \
> +            }                                            \
> +            typed_src += SRC_CHANS;                      \
> +            typed_dst += DST_CHANS;                      \
> +         }                                               \
> +      } else {                                           \
> +         for (int s = 0; s < count; ++s) {               \
> +            for (unsigned j = 0; j < SRC_CHANS; ++j) {   \
> +               SRC_TYPE src = typed_src[j];              \
> +               tmp[j] = CONV;                            \
> +            }                                            \
> +                                                         \
> +            typed_dst[0] = tmp[swizzle_x];               \
> +            if (DST_CHANS > 1) {                         \
> +               typed_dst[1] = tmp[swizzle_y];            \
> +               if (DST_CHANS > 2) {                      \
> +                  typed_dst[2] = tmp[swizzle_z];         \
> +                  if (DST_CHANS > 3) {                   \
> +                     typed_dst[3] = tmp[swizzle_w];      \
> +                  }                                      \
> +               }                                         \
> +            }                                            \
> +            typed_src += SRC_CHANS;                      \
> +            typed_dst += DST_CHANS;                      \
> +         }                                               \
> +      }                                                  \
>     } while (0)
>
>  /**
>   * Represents a single swizzle-and-convert operation
>   *
>   * This macro represents everything done in a single swizzle-and-convert
>   * operation.  The actual work is done by the SWIZZLE_CONVERT_LOOP macro.
>   * This macro acts as a wrapper that uses a nested switch to ensure that
>   * all looping parameters get unrolled.
>   *
> --
> 2.9.4
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170614/96e217f3/attachment-0001.html>


More information about the mesa-dev mailing list