[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

Timothy Arceri tarceri at itsqueeze.com
Thu Jun 15 05:56:57 UTC 2017


On 15/06/17 15:34, Jason Ekstrand wrote:
> On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <tarceri at itsqueeze.com 
> <mailto:tarceri at itsqueeze.com>> wrote:
> 
>     If all the swizzles are inside the src channels range than we can just
>     grab the srcs we need rather than converting everything.
> 
>     perf report convert_float() going from ~10% -> ~7% for the when
>     running the following glean test:
> 
>     glean -o -v -v -v -t +pointAtten
> 
>     Cc: Jason Ekstrand <jason at jlekstrand.net <mailto:jason at jlekstrand.net>>
>     ---
> 
>       Hi Jason,
> 
>       I've only perf tested the above glean test. What did you use to
>     benchmark
>       this when you wrote it?
> 
> 
> The teximage-colors test has a benchmark flag which I added at the 
> time.  I trust that a lot more than some random glean test. :-)

Cool thanks :) I'm seeing upto x5 improvement in some tests otherwise 
largely unchanged :)

> 
> --Jason
> 
>       Thanks,
>       Tim
> 
>       src/mesa/main/format_utils.c | 84
>     +++++++++++++++++++++++++++++++++-----------
>       1 file changed, 63 insertions(+), 21 deletions(-)
> 
>     diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
>     index 65e65d4..1649ac0 100644
>     --- a/src/mesa/main/format_utils.c
>     +++ b/src/mesa/main/format_utils.c
>     @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
>        *
>        * \param   DST_TYPE    the C datatype of the destination
>        * \param   DST_CHANS   the number of destination channels
>        * \param   SRC_TYPE    the C datatype of the source
>        * \param   SRC_CHANS   the number of source channels
>        * \param   CONV        an expression for converting from the
>     source data,
>        *                      storred in the variable "src", to the
>     destination
>        *                      format
>        */
>       #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE,
>     SRC_CHANS, CONV) \
>     -   do {                                           \
>     -      int s, j;                                   \
>     -      for (s = 0; s < count; ++s) {               \
>     -         for (j = 0; j < SRC_CHANS; ++j) {        \
>     -            SRC_TYPE src = typed_src[j];          \
>     -            tmp[j] = CONV;                        \
>     -         }                                        \
>     -                                                  \
>     -         typed_dst[0] = tmp[swizzle_x];           \
>     -         if (DST_CHANS > 1) {                     \
>     -            typed_dst[1] = tmp[swizzle_y];        \
>     -            if (DST_CHANS > 2) {                  \
>     -               typed_dst[2] = tmp[swizzle_z];     \
>     -               if (DST_CHANS > 3) {               \
>     -                  typed_dst[3] = tmp[swizzle_w];  \
>     -               }                                  \
>     -            }                                     \
>     -         }                                        \
>     -         typed_src += SRC_CHANS;                  \
>     -         typed_dst += DST_CHANS;                  \
>     -      }                                           \
>     +   do {                                                  \
>     +      bool fast_path = false;                            \
>     +      if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)       \
>     +         fast_path = true;                               \
>     +      if (DST_CHANS == 2 && swizzle_x < SRC_CHANS &&     \
>     +          swizzle_y < SRC_CHANS)                         \
>     +         fast_path = true;                               \
>     +      if (DST_CHANS == 3 && swizzle_x < SRC_CHANS &&     \
>     +          swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
>     +         fast_path = true;                               \
>     +      if (DST_CHANS == 4 && swizzle_x < SRC_CHANS &&     \
>     +          swizzle_y < SRC_CHANS &&                       \
>     +          swizzle_z < SRC_CHANS &&                       \
>     +          swizzle_w < SRC_CHANS)                         \
>     +         fast_path = true;                               \
>     +                                                         \
>     +      /* The fast path avoids copying/converting srcs we \
>     +       * will never use.                                 \
>     +       */                                                \
>     +      if (fast_path) {                                   \
>     +         for (int s = 0; s < count; ++s) {               \
>     +            SRC_TYPE src = typed_src[swizzle_x];         \
>     +            tmp[swizzle_x] = CONV;                       \
>     +            typed_dst[0] = tmp[swizzle_x];               \
>     +            if (DST_CHANS > 1) {                         \
>     +               SRC_TYPE src = typed_src[swizzle_y];      \
>     +               tmp[swizzle_y] = CONV;                    \
>     +               typed_dst[1] = tmp[swizzle_y];            \
>     +               if (DST_CHANS > 2) {                      \
>     +                  SRC_TYPE src = typed_src[swizzle_z];   \
>     +                  tmp[swizzle_z] = CONV;                 \
>     +                  typed_dst[2] = tmp[swizzle_z];         \
>     +                  if (DST_CHANS > 3) {                   \
>     +                     SRC_TYPE src = typed_src[swizzle_w];\
>     +                     tmp[swizzle_w] = CONV;              \
>     +                     typed_dst[3] = tmp[swizzle_w];      \
>     +                  }                                      \
>     +               }                                         \
>     +            }                                            \
>     +            typed_src += SRC_CHANS;                      \
>     +            typed_dst += DST_CHANS;                      \
>     +         }                                               \
>     +      } else {                                           \
>     +         for (int s = 0; s < count; ++s) {               \
>     +            for (unsigned j = 0; j < SRC_CHANS; ++j) {   \
>     +               SRC_TYPE src = typed_src[j];              \
>     +               tmp[j] = CONV;                            \
>     +            }                                            \
>     +                                                         \
>     +            typed_dst[0] = tmp[swizzle_x];               \
>     +            if (DST_CHANS > 1) {                         \
>     +               typed_dst[1] = tmp[swizzle_y];            \
>     +               if (DST_CHANS > 2) {                      \
>     +                  typed_dst[2] = tmp[swizzle_z];         \
>     +                  if (DST_CHANS > 3) {                   \
>     +                     typed_dst[3] = tmp[swizzle_w];      \
>     +                  }                                      \
>     +               }                                         \
>     +            }                                            \
>     +            typed_src += SRC_CHANS;                      \
>     +            typed_dst += DST_CHANS;                      \
>     +         }                                               \
>     +      }                                                  \
>          } while (0)
> 
>       /**
>        * Represents a single swizzle-and-convert operation
>        *
>        * This macro represents everything done in a single
>     swizzle-and-convert
>        * operation.  The actual work is done by the SWIZZLE_CONVERT_LOOP
>     macro.
>        * This macro acts as a wrapper that uses a nested switch to
>     ensure that
>        * all looping parameters get unrolled.
>        *
>     --
>     2.9.4
> 
> 


More information about the mesa-dev mailing list