<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <span dir="ltr"><<a href="mailto:tarceri@itsqueeze.com" target="_blank">tarceri@itsqueeze.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">If all the swizzles are inside the src channels range than we can just<br>
grab the srcs we need rather than converting everything.<br>
<br>
perf report convert_float() going from ~10% -> ~7% for the when<br>
running the following glean test:<br>
<br>
glean -o -v -v -v -t +pointAtten<br>
<br>
Cc: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br>
---<br>
<br>
 Hi Jason,<br>
<br>
 I've only perf tested the above glean test. What did you use to benchmark<br>
 this when you wrote it?<br></blockquote><div><br></div><div>The teximage-colors test has a benchmark flag which I added at the time.  I trust that a lot more than some random glean test. :-)<br><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
 Thanks,<br>
 Tim<br>
<br>
 src/mesa/main/format_utils.c | 84 ++++++++++++++++++++++++++++++<wbr>+++-----------<br>
 1 file changed, 63 insertions(+), 21 deletions(-)<br>
<br>
diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c<br>
index 65e65d4..1649ac0 100644<br>
--- a/src/mesa/main/format_utils.c<br>
+++ b/src/mesa/main/format_utils.c<br>
@@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(<wbr>void *dst,<br>
  *<br>
  * \param   DST_TYPE    the C datatype of the destination<br>
  * \param   DST_CHANS   the number of destination channels<br>
  * \param   SRC_TYPE    the C datatype of the source<br>
  * \param   SRC_CHANS   the number of source channels<br>
  * \param   CONV        an expression for converting from the source data,<br>
  *                      storred in the variable "src", to the destination<br>
  *                      format<br>
  */<br>
 #define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \<br>
-   do {                                           \<br>
-      int s, j;                                   \<br>
-      for (s = 0; s < count; ++s) {               \<br>
-         for (j = 0; j < SRC_CHANS; ++j) {        \<br>
-            SRC_TYPE src = typed_src[j];          \<br>
-            tmp[j] = CONV;                        \<br>
-         }                                        \<br>
-                                                  \<br>
-         typed_dst[0] = tmp[swizzle_x];           \<br>
-         if (DST_CHANS > 1) {                     \<br>
-            typed_dst[1] = tmp[swizzle_y];        \<br>
-            if (DST_CHANS > 2) {                  \<br>
-               typed_dst[2] = tmp[swizzle_z];     \<br>
-               if (DST_CHANS > 3) {               \<br>
-                  typed_dst[3] = tmp[swizzle_w];  \<br>
-               }                                  \<br>
-            }                                     \<br>
-         }                                        \<br>
-         typed_src += SRC_CHANS;                  \<br>
-         typed_dst += DST_CHANS;                  \<br>
-      }                                           \<br>
+   do {                                                  \<br>
+      bool fast_path = false;                            \<br>
+      if (DST_CHANS == 1 && swizzle_x < SRC_CHANS)       \<br>
+         fast_path = true;                               \<br>
+      if (DST_CHANS == 2 && swizzle_x < SRC_CHANS &&     \<br>
+          swizzle_y < SRC_CHANS)                         \<br>
+         fast_path = true;                               \<br>
+      if (DST_CHANS == 3 && swizzle_x < SRC_CHANS &&     \<br>
+          swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\<br>
+         fast_path = true;                               \<br>
+      if (DST_CHANS == 4 && swizzle_x < SRC_CHANS &&     \<br>
+          swizzle_y < SRC_CHANS &&                       \<br>
+          swizzle_z < SRC_CHANS &&                       \<br>
+          swizzle_w < SRC_CHANS)                         \<br>
+         fast_path = true;                               \<br>
+                                                         \<br>
+      /* The fast path avoids copying/converting srcs we \<br>
+       * will never use.                                 \<br>
+       */                                                \<br>
+      if (fast_path) {                                   \<br>
+         for (int s = 0; s < count; ++s) {               \<br>
+            SRC_TYPE src = typed_src[swizzle_x];         \<br>
+            tmp[swizzle_x] = CONV;                       \<br>
+            typed_dst[0] = tmp[swizzle_x];               \<br>
+            if (DST_CHANS > 1) {                         \<br>
+               SRC_TYPE src = typed_src[swizzle_y];      \<br>
+               tmp[swizzle_y] = CONV;                    \<br>
+               typed_dst[1] = tmp[swizzle_y];            \<br>
+               if (DST_CHANS > 2) {                      \<br>
+                  SRC_TYPE src = typed_src[swizzle_z];   \<br>
+                  tmp[swizzle_z] = CONV;                 \<br>
+                  typed_dst[2] = tmp[swizzle_z];         \<br>
+                  if (DST_CHANS > 3) {                   \<br>
+                     SRC_TYPE src = typed_src[swizzle_w];\<br>
+                     tmp[swizzle_w] = CONV;              \<br>
+                     typed_dst[3] = tmp[swizzle_w];      \<br>
+                  }                                      \<br>
+               }                                         \<br>
+            }                                            \<br>
+            typed_src += SRC_CHANS;                      \<br>
+            typed_dst += DST_CHANS;                      \<br>
+         }                                               \<br>
+      } else {                                           \<br>
+         for (int s = 0; s < count; ++s) {               \<br>
+            for (unsigned j = 0; j < SRC_CHANS; ++j) {   \<br>
+               SRC_TYPE src = typed_src[j];              \<br>
+               tmp[j] = CONV;                            \<br>
+            }                                            \<br>
+                                                         \<br>
+            typed_dst[0] = tmp[swizzle_x];               \<br>
+            if (DST_CHANS > 1) {                         \<br>
+               typed_dst[1] = tmp[swizzle_y];            \<br>
+               if (DST_CHANS > 2) {                      \<br>
+                  typed_dst[2] = tmp[swizzle_z];         \<br>
+                  if (DST_CHANS > 3) {                   \<br>
+                     typed_dst[3] = tmp[swizzle_w];      \<br>
+                  }                                      \<br>
+               }                                         \<br>
+            }                                            \<br>
+            typed_src += SRC_CHANS;                      \<br>
+            typed_dst += DST_CHANS;                      \<br>
+         }                                               \<br>
+      }                                                  \<br>
    } while (0)<br>
<br>
 /**<br>
  * Represents a single swizzle-and-convert operation<br>
  *<br>
  * This macro represents everything done in a single swizzle-and-convert<br>
  * operation.  The actual work is done by the SWIZZLE_CONVERT_LOOP macro.<br>
  * This macro acts as a wrapper that uses a nested switch to ensure that<br>
  * all looping parameters get unrolled.<br>
  *<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.4<br>
<br>
</font></span></blockquote></div><br></div></div>