[Pixman] [RFC] mmx: add and use expand_4xpacked565
Matt Turner
mattst88 at gmail.com
Thu May 17 13:31:02 PDT 2012
Given a pixel with only the red component of these values, the results
are off-by-one.
0x03 -> 0x19 (0x18)
0x07 -> 0x3A (0x39)
0x18 -> 0xC5 (0xC6)
0x1C -> 0xE6 (0xE7)
(Same for blue, and green has many more cases)
It uses
R8 = ( R5 * 527 + 23 ) >> 6;
G8 = ( G6 * 259 + 33 ) >> 6;
B8 = ( B5 * 527 + 23 ) >> 6;
I don't guess there's a way to tweak this to produce the same results
we get from expand565, is there?
---
pixman/pixman-mmx.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 52 insertions(+), 6 deletions(-)
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 01a2bc9..74a2ad8 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -185,6 +185,13 @@ typedef struct
mmxdatafield mmx_565_b;
mmxdatafield mmx_packed_565_rb;
mmxdatafield mmx_packed_565_g;
+ mmxdatafield mmx_expand_565_r;
+ mmxdatafield mmx_expand_565_g;
+ mmxdatafield mmx_expand_565_b;
+ mmxdatafield mmx_mul_adjust_565_rb;
+ mmxdatafield mmx_mul_adjust_565_g;
+ mmxdatafield mmx_add_adjust_565_rb;
+ mmxdatafield mmx_add_adjust_565_g;
#ifndef USE_LOONGSON_MMI
mmxdatafield mmx_mask_0;
mmxdatafield mmx_mask_1;
@@ -216,6 +223,13 @@ static const mmx_data_t c =
MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8),
MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8),
MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00),
+ MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800),
+ MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0),
+ MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f),
+ MMXDATA_INIT (.mmx_mul_adjust_565_rb, 0x020f020f020f020f),
+ MMXDATA_INIT (.mmx_mul_adjust_565_g, 0x0103010301030103),
+ MMXDATA_INIT (.mmx_add_adjust_565_rb, 0x0017001700170017),
+ MMXDATA_INIT (.mmx_add_adjust_565_g, 0x0021002100210021),
#ifndef USE_LOONGSON_MMI
MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000),
MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff),
@@ -518,6 +532,40 @@ expand565 (__m64 pixel, int pos)
return _mm_srli_pi16 (pixel, 8);
}
+void
+expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1)
+{
+ __m64 r = _mm_and_si64 (vin, MC (expand_565_r));
+ __m64 g = _mm_and_si64 (vin, MC (expand_565_g));
+ __m64 b = _mm_and_si64 (vin, MC (expand_565_b));
+
+ r = shift (r, -8 - 3);
+ g = shift (g, -3 - 2);
+
+ r = _mm_mullo_pi16 (r, MC (mul_adjust_565_rb));
+ g = _mm_mullo_pi16 (g, MC (mul_adjust_565_g));
+ b = _mm_mullo_pi16 (b, MC (mul_adjust_565_rb));
+
+ r = _mm_adds_pu16 (r, MC (add_adjust_565_rb));
+ g = _mm_adds_pu16 (g, MC (add_adjust_565_g));
+ b = _mm_adds_pu16 (b, MC (add_adjust_565_rb));
+
+ r = _mm_srli_pi16 (r, 6);
+ g = _mm_srli_pi16 (g, 6);
+ b = _mm_srli_pi16 (b, 6);
+
+ r = _mm_packs_pu16 (r, _mm_setzero_si64 ());
+ g = _mm_packs_pu16 (g, _mm_setzero_si64 ());
+ b = _mm_packs_pu16 (b, _mm_setzero_si64 ());
+
+ __m64 t0 = _mm_unpacklo_pi8 (b, g);
+ __m64 t1 = _mm_unpacklo_pi8 (r, _mm_cmpeq_pi32 (_mm_setzero_si64 (),
+ _mm_setzero_si64 ()));
+
+ *vout0 = _mm_unpacklo_pi16 (t0, t1);
+ *vout1 = _mm_unpackhi_pi16 (t0, t1);
+}
+
static force_inline __m64
expand8888 (__m64 in, int pos)
{
@@ -3341,14 +3389,12 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
while (w >= 4)
{
__m64 vsrc = ldq_u ((__m64 *)src);
+ __m64 mm0, mm1;
- __m64 mm0 = expand565 (vsrc, 0);
- __m64 mm1 = expand565 (vsrc, 1);
- __m64 mm2 = expand565 (vsrc, 2);
- __m64 mm3 = expand565 (vsrc, 3);
+ expand_4xpacked565 (vsrc, &mm0, &mm1);
- *(__m64 *)(dst + 0) = _mm_or_si64 (pack8888 (mm0, mm1), MC (ff000000));
- *(__m64 *)(dst + 2) = _mm_or_si64 (pack8888 (mm2, mm3), MC (ff000000));
+ *(__m64 *)(dst + 0) = mm0;
+ *(__m64 *)(dst + 2) = mm1;
dst += 4;
src += 4;
--
1.7.3.4
More information about the Pixman
mailing list