[Mesa-dev] [PATCH 2/4] i965/tiled_memcpy: Move SSSE3 code back into inline functions.
Roland Scheidegger
sroland at vmware.com
Mon Apr 11 23:17:08 UTC 2016
Am 11.04.2016 um 21:05 schrieb Matt Turner:
> This will make adding SSE2 code a lot cleaner.
> ---
> src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 42 +++++++++++++++-----------
> 1 file changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> index fa5ec75..5d58530 100644
> --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> @@ -85,6 +85,22 @@ rgba8_copy(void *dst, const void *src, size_t bytes)
> #ifdef __SSSE3__
> static const uint8_t rgba8_permutation[16] =
> { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
> +
> +static inline void
> +rgba8_copy_16_aligned_dst(void *dst, const void *src)
> +{
> + _mm_store_si128(dst,
> + _mm_shuffle_epi8(_mm_loadu_si128(src),
> + *(__m128i *)rgba8_permutation));
> +}
> +
> +static inline void
> +rgba8_copy_16_aligned_src(void *dst, const void *src)
> +{
> + _mm_storeu_si128(dst,
> + _mm_shuffle_epi8(_mm_load_si128(src),
> + *(__m128i *)rgba8_permutation));
> +}
> #endif
>
> /**
> @@ -93,23 +109,18 @@ static const uint8_t rgba8_permutation[16] =
> static inline void *
> rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
> {
> - uint8_t *d = dst;
> - uint8_t const *s = src;
> -
> assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
>
> #ifdef __SSSE3__
> while (bytes >= 16) {
> - _mm_store_si128((__m128i *)d,
> - _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s),
> - *(__m128i *) rgba8_permutation));
> - s += 16;
> - d += 16;
> + rgba8_copy_16_aligned_dst(dst, src);
> + src += 16;
> + dst += 16;
> bytes -= 16;
> }
> #endif
>
> - rgba8_copy(d, s, bytes);
> + rgba8_copy(dst, src, bytes);
>
> return dst;
> }
> @@ -120,23 +131,18 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
> static inline void *
> rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
> {
> - uint8_t *d = dst;
> - uint8_t const *s = src;
> -
> assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
>
> #ifdef __SSSE3__
> while (bytes >= 16) {
> - _mm_storeu_si128((__m128i *)d,
> - _mm_shuffle_epi8(_mm_load_si128((__m128i *)s),
> - *(__m128i *) rgba8_permutation));
> - s += 16;
> - d += 16;
> + rgba8_copy_16_aligned_src(dst, src);
> + src += 16;
> + dst += 16;
> bytes -= 16;
> }
> #endif
>
> - rgba8_copy(d, s, bytes);
> + rgba8_copy(dst, src, bytes);
>
> return dst;
> }
>
I thought void ptr arithmetic is illegal (though a gcc extension), so
maybe should avoid that?
Roland
More information about the mesa-dev
mailing list