[Pixman] [test PATCH] Use _mm_maddubs_epi16 in BILINEAR_INTERPOLATE_ONE_PIXEL
Matt Turner
mattst88 at gmail.com
Sat Sep 29 00:12:16 PDT 2012
Siarhei, can you measure any performance improvement with this? I
can't... :(
---
pixman/pixman-sse2.c | 8 +++-----
1 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index efed310..4fbc045 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -32,6 +32,7 @@
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
+#include <tmmintrin.h> /* for SSSE3 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
@@ -5414,7 +5415,7 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
do { \
- __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ __m128i xmm_wh, a; \
/* fetch 2x2 pixel block into sse2 registers */ \
__m128i tltr = _mm_loadl_epi64 ( \
(__m128i *)&src_top[pixman_fixed_to_int (vx)]); \
@@ -5443,10 +5444,7 @@ do { \
_mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
- xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
- xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
- a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
- _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ a = _mm_maddubs_epi16 (a, xmm_wh); \
} \
/* shift and pack the result */ \
a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \
--
1.7.8.6
More information about the Pixman
mailing list