[Mesa-dev] [PATCH 2/4] i965/tiled_memcpy: Move SSSE3 code back into inline functions.
Matt Turner
mattst88 at gmail.com
Mon Apr 11 19:05:21 UTC 2016
This will make adding SSE2 code a lot cleaner.
---
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 42 +++++++++++++++-----------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index fa5ec75..5d58530 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -85,6 +85,22 @@ rgba8_copy(void *dst, const void *src, size_t bytes)
#ifdef __SSSE3__
static const uint8_t rgba8_permutation[16] =
{ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
+
+static inline void
+rgba8_copy_16_aligned_dst(void *dst, const void *src)
+{
+ _mm_store_si128(dst,
+ _mm_shuffle_epi8(_mm_loadu_si128(src),
+ *(__m128i *)rgba8_permutation));
+}
+
+static inline void
+rgba8_copy_16_aligned_src(void *dst, const void *src)
+{
+ _mm_storeu_si128(dst,
+ _mm_shuffle_epi8(_mm_load_si128(src),
+ *(__m128i *)rgba8_permutation));
+}
#endif
/**
@@ -93,23 +109,18 @@ static const uint8_t rgba8_permutation[16] =
static inline void *
rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
{
- uint8_t *d = dst;
- uint8_t const *s = src;
-
assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
#ifdef __SSSE3__
while (bytes >= 16) {
- _mm_store_si128((__m128i *)d,
- _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s),
- *(__m128i *) rgba8_permutation));
- s += 16;
- d += 16;
+ rgba8_copy_16_aligned_dst(dst, src);
+ src += 16;
+ dst += 16;
bytes -= 16;
}
#endif
- rgba8_copy(d, s, bytes);
+ rgba8_copy(dst, src, bytes);
return dst;
}
@@ -120,23 +131,18 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
static inline void *
rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
{
- uint8_t *d = dst;
- uint8_t const *s = src;
-
assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
#ifdef __SSSE3__
while (bytes >= 16) {
- _mm_storeu_si128((__m128i *)d,
- _mm_shuffle_epi8(_mm_load_si128((__m128i *)s),
- *(__m128i *) rgba8_permutation));
- s += 16;
- d += 16;
+ rgba8_copy_16_aligned_src(dst, src);
+ src += 16;
+ dst += 16;
bytes -= 16;
}
#endif
- rgba8_copy(d, s, bytes);
+ rgba8_copy(dst, src, bytes);
return dst;
}
--
2.7.3
More information about the mesa-dev
mailing list