[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop
Jason Ekstrand
jason at jlekstrand.net
Thu Jun 15 05:34:03 UTC 2017
On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <tarceri at itsqueeze.com>
wrote:
> If all the swizzles are inside the src channels range than we can just
> grab the srcs we need rather than converting everything.
>
> perf report convert_float() going from ~10% -> ~7% for the when
> running the following glean test:
>
> glean -o -v -v -v -t +pointAtten
>
> Cc: Jason Ekstrand <jason at jlekstrand.net>
> ---
>
> Hi Jason,
>
> I've only perf tested the above glean test. What did you use to benchmark
> this when you wrote it?
>
The teximage-colors test has a benchmark flag which I added at the time. I
trust that a lot more than some random glean test. :-)
--Jason
> Thanks,
> Tim
>
> src/mesa/main/format_utils.c | 84 ++++++++++++++++++++++++++++++
> +++-----------
> 1 file changed, 63 insertions(+), 21 deletions(-)
>
> diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
> index 65e65d4..1649ac0 100644
> --- a/src/mesa/main/format_utils.c
> +++ b/src/mesa/main/format_utils.c
> @@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
> *
> * \param DST_TYPE the C datatype of the destination
> * \param DST_CHANS the number of destination channels
> * \param SRC_TYPE the C datatype of the source
> * \param SRC_CHANS the number of source channels
> * \param CONV an expression for converting from the source data,
> * storred in the variable "src", to the destination
> * format
> */
> #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS,
> CONV) \
> - do { \
> - int s, j; \
> - for (s = 0; s < count; ++s) { \
> - for (j = 0; j < SRC_CHANS; ++j) { \
> - SRC_TYPE src = typed_src[j]; \
> - tmp[j] = CONV; \
> - } \
> - \
> - typed_dst[0] = tmp[swizzle_x]; \
> - if (DST_CHANS > 1) { \
> - typed_dst[1] = tmp[swizzle_y]; \
> - if (DST_CHANS > 2) { \
> - typed_dst[2] = tmp[swizzle_z]; \
> - if (DST_CHANS > 3) { \
> - typed_dst[3] = tmp[swizzle_w]; \
> - } \
> - } \
> - } \
> - typed_src += SRC_CHANS; \
> - typed_dst += DST_CHANS; \
> - } \
> + do { \
> + bool fast_path = false; \
> + if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \
> + fast_path = true; \
> + if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \
> + swizzle_y < SRC_CHANS) \
> + fast_path = true; \
> + if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \
> + swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
> + fast_path = true; \
> + if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \
> + swizzle_y < SRC_CHANS && \
> + swizzle_z < SRC_CHANS && \
> + swizzle_w < SRC_CHANS) \
> + fast_path = true; \
> + \
> + /* The fast path avoids copying/converting srcs we \
> + * will never use. \
> + */ \
> + if (fast_path) { \
> + for (int s = 0; s < count; ++s) { \
> + SRC_TYPE src = typed_src[swizzle_x]; \
> + tmp[swizzle_x] = CONV; \
> + typed_dst[0] = tmp[swizzle_x]; \
> + if (DST_CHANS > 1) { \
> + SRC_TYPE src = typed_src[swizzle_y]; \
> + tmp[swizzle_y] = CONV; \
> + typed_dst[1] = tmp[swizzle_y]; \
> + if (DST_CHANS > 2) { \
> + SRC_TYPE src = typed_src[swizzle_z]; \
> + tmp[swizzle_z] = CONV; \
> + typed_dst[2] = tmp[swizzle_z]; \
> + if (DST_CHANS > 3) { \
> + SRC_TYPE src = typed_src[swizzle_w];\
> + tmp[swizzle_w] = CONV; \
> + typed_dst[3] = tmp[swizzle_w]; \
> + } \
> + } \
> + } \
> + typed_src += SRC_CHANS; \
> + typed_dst += DST_CHANS; \
> + } \
> + } else { \
> + for (int s = 0; s < count; ++s) { \
> + for (unsigned j = 0; j < SRC_CHANS; ++j) { \
> + SRC_TYPE src = typed_src[j]; \
> + tmp[j] = CONV; \
> + } \
> + \
> + typed_dst[0] = tmp[swizzle_x]; \
> + if (DST_CHANS > 1) { \
> + typed_dst[1] = tmp[swizzle_y]; \
> + if (DST_CHANS > 2) { \
> + typed_dst[2] = tmp[swizzle_z]; \
> + if (DST_CHANS > 3) { \
> + typed_dst[3] = tmp[swizzle_w]; \
> + } \
> + } \
> + } \
> + typed_src += SRC_CHANS; \
> + typed_dst += DST_CHANS; \
> + } \
> + } \
> } while (0)
>
> /**
> * Represents a single swizzle-and-convert operation
> *
> * This macro represents everything done in a single swizzle-and-convert
> * operation. The actual work is done by the SWIZZLE_CONVERT_LOOP macro.
> * This macro acts as a wrapper that uses a nested switch to ensure that
> * all looping parameters get unrolled.
> *
> --
> 2.9.4
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170614/96e217f3/attachment-0001.html>
More information about the mesa-dev
mailing list