[Mesa-dev] [PATCH 2/2] mesa: add fastpath version of the format conversion loop

Timothy Arceri tarceri at itsqueeze.com
Thu Jun 15 05:26:17 UTC 2017


If all the swizzles are inside the src channels range than we can just
grab the srcs we need rather than converting everything.

perf report convert_float() going from ~10% -> ~7% for the when
running the following glean test:

glean -o -v -v -v -t +pointAtten

Cc: Jason Ekstrand <jason at jlekstrand.net>
---

 Hi Jason,

 I've only perf tested the above glean test. What did you use to benchmark
 this when you wrote it?

 Thanks,
 Tim

 src/mesa/main/format_utils.c | 84 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 65e65d4..1649ac0 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(void *dst,
  *
  * \param   DST_TYPE    the C datatype of the destination
  * \param   DST_CHANS   the number of destination channels
  * \param   SRC_TYPE    the C datatype of the source
  * \param   SRC_CHANS   the number of source channels
  * \param   CONV        an expression for converting from the source data,
  *                      storred in the variable "src", to the destination
  *                      format
  */
 #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \
-   do {                                           \
-      int s, j;                                   \
-      for (s = 0; s < count; ++s) {               \
-         for (j = 0; j < SRC_CHANS; ++j) {        \
-            SRC_TYPE src = typed_src[j];          \
-            tmp[j] = CONV;                        \
-         }                                        \
-                                                  \
-         typed_dst[0] = tmp[swizzle_x];           \
-         if (DST_CHANS > 1) {                     \
-            typed_dst[1] = tmp[swizzle_y];        \
-            if (DST_CHANS > 2) {                  \
-               typed_dst[2] = tmp[swizzle_z];     \
-               if (DST_CHANS > 3) {               \
-                  typed_dst[3] = tmp[swizzle_w];  \
-               }                                  \
-            }                                     \
-         }                                        \
-         typed_src += SRC_CHANS;                  \
-         typed_dst += DST_CHANS;                  \
-      }                                           \
+   do {                                                  \
+      bool fast_path = false;                            \
+      if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)       \
+         fast_path = true;                               \
+      if (DST_CHANS == 2 && swizzle_x < SRC_CHANS &&     \
+          swizzle_y < SRC_CHANS)                         \
+         fast_path = true;                               \
+      if (DST_CHANS == 3 && swizzle_x < SRC_CHANS &&     \
+          swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\
+         fast_path = true;                               \
+      if (DST_CHANS == 4 && swizzle_x < SRC_CHANS &&     \
+          swizzle_y < SRC_CHANS &&                       \
+          swizzle_z < SRC_CHANS &&                       \
+          swizzle_w < SRC_CHANS)                         \
+         fast_path = true;                               \
+                                                         \
+      /* The fast path avoids copying/converting srcs we \
+       * will never use.                                 \
+       */                                                \
+      if (fast_path) {                                   \
+         for (int s = 0; s < count; ++s) {               \
+            SRC_TYPE src = typed_src[swizzle_x];         \
+            tmp[swizzle_x] = CONV;                       \
+            typed_dst[0] = tmp[swizzle_x];               \
+            if (DST_CHANS > 1) {                         \
+               SRC_TYPE src = typed_src[swizzle_y];      \
+               tmp[swizzle_y] = CONV;                    \
+               typed_dst[1] = tmp[swizzle_y];            \
+               if (DST_CHANS > 2) {                      \
+                  SRC_TYPE src = typed_src[swizzle_z];   \
+                  tmp[swizzle_z] = CONV;                 \
+                  typed_dst[2] = tmp[swizzle_z];         \
+                  if (DST_CHANS > 3) {                   \
+                     SRC_TYPE src = typed_src[swizzle_w];\
+                     tmp[swizzle_w] = CONV;              \
+                     typed_dst[3] = tmp[swizzle_w];      \
+                  }                                      \
+               }                                         \
+            }                                            \
+            typed_src += SRC_CHANS;                      \
+            typed_dst += DST_CHANS;                      \
+         }                                               \
+      } else {                                           \
+         for (int s = 0; s < count; ++s) {               \
+            for (unsigned j = 0; j < SRC_CHANS; ++j) {   \
+               SRC_TYPE src = typed_src[j];              \
+               tmp[j] = CONV;                            \
+            }                                            \
+                                                         \
+            typed_dst[0] = tmp[swizzle_x];               \
+            if (DST_CHANS > 1) {                         \
+               typed_dst[1] = tmp[swizzle_y];            \
+               if (DST_CHANS > 2) {                      \
+                  typed_dst[2] = tmp[swizzle_z];         \
+                  if (DST_CHANS > 3) {                   \
+                     typed_dst[3] = tmp[swizzle_w];      \
+                  }                                      \
+               }                                         \
+            }                                            \
+            typed_src += SRC_CHANS;                      \
+            typed_dst += DST_CHANS;                      \
+         }                                               \
+      }                                                  \
    } while (0)
 
 /**
  * Represents a single swizzle-and-convert operation
  *
  * This macro represents everything done in a single swizzle-and-convert
  * operation.  The actual work is done by the SWIZZLE_CONVERT_LOOP macro.
  * This macro acts as a wrapper that uses a nested switch to ensure that
  * all looping parameters get unrolled.
  *
-- 
2.9.4



More information about the mesa-dev mailing list