[Pixman] [PATCH 2/2] vmx: there is no need to handle unaligned destination anymore

Siarhei Siamashka siarhei.siamashka at gmail.com
Fri Sep 27 19:30:15 PDT 2013


So the redundant variables, memory reads/writes and reshuffles
can be safely removed. For example, this makes the inner loop
of 'vmx_combine_add_u_no_mask' function much more simple.

Before:

    7a20:7d a8 48 ce lvx     v13,r8,r9
    7a24:7d 80 48 ce lvx     v12,r0,r9
    7a28:7d 28 50 ce lvx     v9,r8,r10
    7a2c:7c 20 50 ce lvx     v1,r0,r10
    7a30:39 4a 00 10 addi    r10,r10,16
    7a34:10 0d 62 eb vperm   v0,v13,v12,v11
    7a38:10 21 4a 2b vperm   v1,v1,v9,v8
    7a3c:11 2c 6a eb vperm   v9,v12,v13,v11
    7a40:10 21 4a 00 vaddubs v1,v1,v9
    7a44:11 a1 02 ab vperm   v13,v1,v0,v10
    7a48:10 00 0a ab vperm   v0,v0,v1,v10
    7a4c:7d a8 49 ce stvx    v13,r8,r9
    7a50:7c 00 49 ce stvx    v0,r0,r9
    7a54:39 29 00 10 addi    r9,r9,16
    7a58:42 00 ff c8 bdnz+   7a20 <.vmx_combine_add_u_no_mask+0x120>

After:

    76c0:7c 00 48 ce lvx     v0,r0,r9
    76c4:7d a8 48 ce lvx     v13,r8,r9
    76c8:39 29 00 10 addi    r9,r9,16
    76cc:7c 20 50 ce lvx     v1,r0,r10
    76d0:10 00 6b 2b vperm   v0,v0,v13,v12
    76d4:10 00 0a 00 vaddubs v0,v0,v1
    76d8:7c 00 51 ce stvx    v0,r0,r10
    76dc:39 4a 00 10 addi    r10,r10,16
    76e0:42 00 ff e0 bdnz+   76c0 <.vmx_combine_add_u_no_mask+0x120>
---
 pixman/pixman-vmx.c | 117 ++++++++++++++++------------------------------------
 1 file changed, 36 insertions(+), 81 deletions(-)

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 130d78e..c33631c 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -134,15 +134,11 @@ over (vector unsigned int src,
     source ## _mask = vec_lvsl (0, source);
 
 #define COMPUTE_SHIFT_MASKS(dest, source)				\
-    dest ## _mask = vec_lvsl (0, dest);					\
-    source ## _mask = vec_lvsl (0, source);				\
-    store_mask = vec_lvsr (0, dest);
+    source ## _mask = vec_lvsl (0, source);
 
 #define COMPUTE_SHIFT_MASKC(dest, source, mask)				\
     mask ## _mask = vec_lvsl (0, mask);					\
-    dest ## _mask = vec_lvsl (0, dest);					\
-    source ## _mask = vec_lvsl (0, source);				\
-    store_mask = vec_lvsr (0, dest);
+    source ## _mask = vec_lvsl (0, source);
 
 /* notice you have to declare temp vars...
  * Note: tmp3 and tmp4 must remain untouched!
@@ -151,23 +147,17 @@ over (vector unsigned int src,
 #define LOAD_VECTORS(dest, source)			  \
     tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
     tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
-    tmp3 = (typeof(tmp3))vec_ld (0, dest);		  \
     v ## source = (typeof(v ## source))			  \
 	vec_perm (tmp1, tmp2, source ## _mask);		  \
-    tmp4 = (typeof(tmp4))vec_ld (15, dest);		  \
-    v ## dest = (typeof(v ## dest))			  \
-	vec_perm (tmp3, tmp4, dest ## _mask);
+    v ## dest = (typeof(v ## dest))vec_ld (0, dest);
 
 #define LOAD_VECTORSC(dest, source, mask)		  \
     tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
     tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
-    tmp3 = (typeof(tmp3))vec_ld (0, dest);		  \
     v ## source = (typeof(v ## source))			  \
 	vec_perm (tmp1, tmp2, source ## _mask);		  \
-    tmp4 = (typeof(tmp4))vec_ld (15, dest);		  \
     tmp1 = (typeof(tmp1))vec_ld (0, mask);		  \
-    v ## dest = (typeof(v ## dest))			  \
-	vec_perm (tmp3, tmp4, dest ## _mask);		  \
+    v ## dest = (typeof(v ## dest))vec_ld (0, dest);	  \
     tmp2 = (typeof(tmp2))vec_ld (15, mask);		  \
     v ## mask = (typeof(v ## mask))			  \
 	vec_perm (tmp1, tmp2, mask ## _mask);
@@ -178,11 +168,7 @@ over (vector unsigned int src,
                                 splat_alpha (v ## mask));
 
 #define STORE_VECTOR(dest)						\
-    edges = vec_perm (tmp4, tmp3, dest ## _mask);			\
-    tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
-    tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
-    vec_st ((vector unsigned int) tmp3, 15, dest);			\
-    vec_st ((vector unsigned int) tmp1, 0, dest);
+    vec_st ((vector unsigned int) v ## dest, 0, dest);
 
 static void
 vmx_combine_over_u_no_mask (uint32_t *      dest,
@@ -191,8 +177,7 @@ vmx_combine_over_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -242,8 +227,7 @@ vmx_combine_over_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -314,8 +298,7 @@ vmx_combine_over_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -363,8 +346,7 @@ vmx_combine_over_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -432,8 +414,7 @@ vmx_combine_in_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -478,8 +459,7 @@ vmx_combine_in_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -544,8 +524,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -592,8 +571,7 @@ vmx_combine_in_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -660,8 +638,7 @@ vmx_combine_out_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -708,8 +685,7 @@ vmx_combine_out_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -774,8 +750,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -823,8 +798,7 @@ vmx_combine_out_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -891,8 +865,7 @@ vmx_combine_atop_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -944,8 +917,7 @@ vmx_combine_atop_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1021,8 +993,7 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1074,8 +1045,7 @@ vmx_combine_atop_reverse_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1151,8 +1121,7 @@ vmx_combine_xor_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1204,8 +1173,7 @@ vmx_combine_xor_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1281,8 +1249,7 @@ vmx_combine_add_u_no_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1328,8 +1295,7 @@ vmx_combine_add_u_mask (uint32_t *      dest,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, src_mask, mask_mask, store_mask;
+    vector unsigned char tmp1, tmp2, src_mask, mask_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1397,8 +1363,7 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1448,8 +1413,7 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1507,8 +1471,7 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1564,8 +1527,7 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1619,8 +1581,7 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1675,8 +1636,7 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1733,8 +1693,7 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1791,8 +1750,7 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask, vsrca;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1858,8 +1816,7 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1922,8 +1879,7 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
@@ -1986,8 +1942,7 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
 {
     int i;
     vector unsigned int vdest, vsrc, vmask;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-	dest_mask, mask_mask, src_mask, store_mask;
+    vector unsigned char tmp1, tmp2, mask_mask, src_mask;
 
     while (width && ((uintptr_t)dest & 15))
     {
-- 
1.8.1.5



More information about the Pixman mailing list