<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Wed, Jun 14, 2017 at 10:26 PM, Timothy Arceri <span dir="ltr"><<a href="mailto:tarceri@itsqueeze.com" target="_blank">tarceri@itsqueeze.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">If all the swizzles are inside the src channels range than we can just<br>
grab the srcs we need rather than converting everything.<br>
<br>
perf report convert_float() going from ~10% -> ~7% for the when<br>
running the following glean test:<br>
<br>
glean -o -v -v -v -t +pointAtten<br>
<br>
Cc: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br>
---<br>
<br>
Hi Jason,<br>
<br>
I've only perf tested the above glean test. What did you use to benchmark<br>
this when you wrote it?<br></blockquote><div><br></div><div>The teximage-colors test has a benchmark flag which I added at the time. I trust that a lot more than some random glean test. :-)<br><br></div><div>--Jason<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Thanks,<br>
Tim<br>
<br>
src/mesa/main/format_utils.c | 84 ++++++++++++++++++++++++++++++<wbr>+++-----------<br>
1 file changed, 63 insertions(+), 21 deletions(-)<br>
<br>
diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c<br>
index 65e65d4..1649ac0 100644<br>
--- a/src/mesa/main/format_utils.c<br>
+++ b/src/mesa/main/format_utils.c<br>
@@ -799,41 +799,83 @@ swizzle_convert_try_memcpy(<wbr>void *dst,<br>
*<br>
* \param DST_TYPE the C datatype of the destination<br>
* \param DST_CHANS the number of destination channels<br>
* \param SRC_TYPE the C datatype of the source<br>
* \param SRC_CHANS the number of source channels<br>
* \param CONV an expression for converting from the source data,<br>
* storred in the variable "src", to the destination<br>
* format<br>
*/<br>
#define SWIZZLE_CONVERT_LOOP(DST_TYPE, DST_CHANS, SRC_TYPE, SRC_CHANS, CONV) \<br>
- do { \<br>
- int s, j; \<br>
- for (s = 0; s < count; ++s) { \<br>
- for (j = 0; j < SRC_CHANS; ++j) { \<br>
- SRC_TYPE src = typed_src[j]; \<br>
- tmp[j] = CONV; \<br>
- } \<br>
- \<br>
- typed_dst[0] = tmp[swizzle_x]; \<br>
- if (DST_CHANS > 1) { \<br>
- typed_dst[1] = tmp[swizzle_y]; \<br>
- if (DST_CHANS > 2) { \<br>
- typed_dst[2] = tmp[swizzle_z]; \<br>
- if (DST_CHANS > 3) { \<br>
- typed_dst[3] = tmp[swizzle_w]; \<br>
- } \<br>
- } \<br>
- } \<br>
- typed_src += SRC_CHANS; \<br>
- typed_dst += DST_CHANS; \<br>
- } \<br>
+ do { \<br>
+ bool fast_path = false; \<br>
+ if (DST_CHANS == 1 && swizzle_x < SRC_CHANS) \<br>
+ fast_path = true; \<br>
+ if (DST_CHANS == 2 && swizzle_x < SRC_CHANS && \<br>
+ swizzle_y < SRC_CHANS) \<br>
+ fast_path = true; \<br>
+ if (DST_CHANS == 3 && swizzle_x < SRC_CHANS && \<br>
+ swizzle_y < SRC_CHANS && swizzle_z < SRC_CHANS)\<br>
+ fast_path = true; \<br>
+ if (DST_CHANS == 4 && swizzle_x < SRC_CHANS && \<br>
+ swizzle_y < SRC_CHANS && \<br>
+ swizzle_z < SRC_CHANS && \<br>
+ swizzle_w < SRC_CHANS) \<br>
+ fast_path = true; \<br>
+ \<br>
+ /* The fast path avoids copying/converting srcs we \<br>
+ * will never use. \<br>
+ */ \<br>
+ if (fast_path) { \<br>
+ for (int s = 0; s < count; ++s) { \<br>
+ SRC_TYPE src = typed_src[swizzle_x]; \<br>
+ tmp[swizzle_x] = CONV; \<br>
+ typed_dst[0] = tmp[swizzle_x]; \<br>
+ if (DST_CHANS > 1) { \<br>
+ SRC_TYPE src = typed_src[swizzle_y]; \<br>
+ tmp[swizzle_y] = CONV; \<br>
+ typed_dst[1] = tmp[swizzle_y]; \<br>
+ if (DST_CHANS > 2) { \<br>
+ SRC_TYPE src = typed_src[swizzle_z]; \<br>
+ tmp[swizzle_z] = CONV; \<br>
+ typed_dst[2] = tmp[swizzle_z]; \<br>
+ if (DST_CHANS > 3) { \<br>
+ SRC_TYPE src = typed_src[swizzle_w];\<br>
+ tmp[swizzle_w] = CONV; \<br>
+ typed_dst[3] = tmp[swizzle_w]; \<br>
+ } \<br>
+ } \<br>
+ } \<br>
+ typed_src += SRC_CHANS; \<br>
+ typed_dst += DST_CHANS; \<br>
+ } \<br>
+ } else { \<br>
+ for (int s = 0; s < count; ++s) { \<br>
+ for (unsigned j = 0; j < SRC_CHANS; ++j) { \<br>
+ SRC_TYPE src = typed_src[j]; \<br>
+ tmp[j] = CONV; \<br>
+ } \<br>
+ \<br>
+ typed_dst[0] = tmp[swizzle_x]; \<br>
+ if (DST_CHANS > 1) { \<br>
+ typed_dst[1] = tmp[swizzle_y]; \<br>
+ if (DST_CHANS > 2) { \<br>
+ typed_dst[2] = tmp[swizzle_z]; \<br>
+ if (DST_CHANS > 3) { \<br>
+ typed_dst[3] = tmp[swizzle_w]; \<br>
+ } \<br>
+ } \<br>
+ } \<br>
+ typed_src += SRC_CHANS; \<br>
+ typed_dst += DST_CHANS; \<br>
+ } \<br>
+ } \<br>
} while (0)<br>
<br>
/**<br>
* Represents a single swizzle-and-convert operation<br>
*<br>
* This macro represents everything done in a single swizzle-and-convert<br>
* operation. The actual work is done by the SWIZZLE_CONVERT_LOOP macro.<br>
* This macro acts as a wrapper that uses a nested switch to ensure that<br>
* all looping parameters get unrolled.<br>
*<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.4<br>
<br>
</font></span></blockquote></div><br></div></div>