[Pixman] [PATCH v2 11/11] vmx: implement fast path iterator vmx_fetch_a8
Oded Gabbay
oded.gabbay at gmail.com
Wed Jul 15 06:36:35 PDT 2015
no changes were observed when running cairo trimmed benchmarks.
Running "lowlevel-blt-bench src_8_8888" on POWER8, 8 cores,
3.4GHz, RHEL 7.1 ppc64le gave the following results:
reference memcpy speed = 25197.2MB/s (6299.3MP/s for 32bpp fills)
Before After Change
--------------------------------------------
L1 965.34 3936 +307.73%
L2 942.99 3436.29 +264.40%
M 902.24 2757.77 +205.66%
HT 448.46 784.99 +75.04%
VT 430.05 819.78 +90.62%
R 412.9 717.04 +73.66%
RT 168.93 220.63 +30.60%
Kops/s 1025 1303 +27.12%
It was benchmarked against commid id e2d211a from pixman/master
Siarhei Siamashka reported that on playstation3, it shows the following
results:
== before ==
src_8_8888 = L1: 194.37 L2: 198.46 M:155.90 (148.35%)
HT: 59.18 VT: 36.71 R: 38.93 RT: 12.79 ( 106Kops/s)
== after ==
src_8_8888 = L1: 373.96 L2: 391.10 M:245.81 (233.88%)
HT: 80.81 VT: 44.33 R: 48.10 RT: 14.79 ( 122Kops/s)
Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka at gmail.com>
---
pixman/pixman-vmx.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index ec39c81..3349657 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -3137,6 +3137,49 @@ vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
+static uint32_t *
+vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint8_t *src = iter->bits;
+ vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6;
+
+ iter->bits += iter->stride;
+
+ while (w && (((uintptr_t)dst) & 15))
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ vmx0 = load_128_unaligned((uint32_t *) src);
+
+ unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2);
+ unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4);
+ unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6);
+
+ save_128_aligned(dst, vmx6);
+ save_128_aligned((dst + 4), vmx5);
+ save_128_aligned((dst + 8), vmx4);
+ save_128_aligned((dst + 12), vmx3);
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
@@ -3146,6 +3189,9 @@ static const pixman_iter_info_t vmx_iters[] =
{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
_pixman_iter_init_bits_stride, vmx_fetch_x8r8g8b8, NULL
},
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, vmx_fetch_a8, NULL
+ },
{ PIXMAN_null },
};
--
2.4.3
More information about the Pixman
mailing list