[Pixman] [PATCH 1/1] vmx: workarounds to fix powerpc little endian particularities

Thu May 28 07:22:48 PDT 2015

I have made some changes to the file pixman-vmx.c, which uses vmx (aka altivec)
to optimize pixman. Basically, what I did:
 Changed the usage of vec_perm, vec_mergeh and vec_mergel. They were giving
weird results when running on little endian. That was because the integer
vectors were being cast to char, which made them be ordered and permuted byte
by byte.
 Replaced usage of vec_lvsl to direct unaligned assignment operation (=). That
is because, according to Power ABI Specification, the usage of lvsl is
deprecated on ppc64le.
 Changed COMPUTE_SHIFT_{MASK,MASKS,MASKC} macro usage to no-op for powerpc
little endian since unaligned access is supported on ppc64le.
After those changes, all tests passed on ppc64le. Tests on ppc64 and powerpc
got the same results as before the changes.

Signed-off-by: Fernando Seiti Furusato <ferseiti at linux.vnet.ibm.com>
---
 pixman/pixman-vmx.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 88 insertions(+), 9 deletions(-)

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index c33631c..5b6e98c 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -34,6 +34,7 @@
 
 #define AVV(x...) {x}
 
+#ifdef WORDS_BIGENDIAN
 static force_inline vector unsigned int
 splat_alpha (vector unsigned int pix)
 {
@@ -84,6 +85,59 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
     return (vector unsigned int)vec_packsu (hi, lo);
 }
 
+#else //ifdef WORDS_BIGENDIAN
+
+static force_inline vector unsigned int
+splat_alpha (vector unsigned int pix)
+{
+    return vec_perm (pix, pix,
+		     (vector unsigned char)AVV (
+			 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07,
+			 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F));
+}
+
+static force_inline vector unsigned int
+pix_multiply (vector unsigned int p, vector unsigned int a)
+{
+    vector unsigned short hi, lo, mod;
+
+    /* unpack to short */
+    hi = (vector unsigned short)
+	vec_mergeh ((vector unsigned char)p,
+		    (vector unsigned char)AVV (0));
+
+    mod = (vector unsigned short)
+	vec_mergeh ((vector unsigned char)a,
+		    (vector unsigned char)AVV (0));
+
+    hi = vec_mladd (hi, mod, (vector unsigned short)
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+
+    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
+
+    hi = vec_sr (hi, vec_splat_u16 (8));
+
+    /* unpack to short */
+    lo = (vector unsigned short)
+	vec_mergel ((vector unsigned char)p,
+		    (vector unsigned char)AVV (0));
+    mod = (vector unsigned short)
+	vec_mergel ((vector unsigned char)a,
+		    (vector unsigned char)AVV (0));
+
+    lo = vec_mladd (lo, mod, (vector unsigned short)
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+
+    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
+
+    lo = vec_sr (lo, vec_splat_u16 (8));
+
+    return (vector unsigned int)vec_packsu (hi, lo);
+}
+#endif //WORDS_BIGENDIAN
+
 static force_inline vector unsigned int
 pix_add (vector unsigned int a, vector unsigned int b)
 {
@@ -129,29 +183,26 @@ over (vector unsigned int src,
     over (pix_multiply (src, mask),					\
           pix_multiply (srca, mask), dest)
 
+#ifdef WORDS_BIGENDIAN
 
-#define COMPUTE_SHIFT_MASK(source)					\
+# define COMPUTE_SHIFT_MASK(source)					\
     source ## _mask = vec_lvsl (0, source);
 
-#define COMPUTE_SHIFT_MASKS(dest, source)				\
+# define COMPUTE_SHIFT_MASKS(dest, source)				\
     source ## _mask = vec_lvsl (0, source);
 
-#define COMPUTE_SHIFT_MASKC(dest, source, mask)				\
+# define COMPUTE_SHIFT_MASKC(dest, source, mask)			\
     mask ## _mask = vec_lvsl (0, mask);					\
     source ## _mask = vec_lvsl (0, source);
 
-/* notice you have to declare temp vars...
- * Note: tmp3 and tmp4 must remain untouched!
- */
-
-#define LOAD_VECTORS(dest, source)			  \
+# define LOAD_VECTORS(dest, source)			  \
     tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
     tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
     v ## source = (typeof(v ## source))			  \
 	vec_perm (tmp1, tmp2, source ## _mask);		  \
     v ## dest = (typeof(v ## dest))vec_ld (0, dest);
 
-#define LOAD_VECTORSC(dest, source, mask)		  \
+# define LOAD_VECTORSC(dest, source, mask)		  \
     tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
     tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
     v ## source = (typeof(v ## source))			  \
@@ -162,6 +213,34 @@ over (vector unsigned int src,
     v ## mask = (typeof(v ## mask))			  \
 	vec_perm (tmp1, tmp2, mask ## _mask);
 
+#else //WORDS_BIGENDIAN
+
+/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
+ * They are defined that way because little endian altivec can do unaligned
+ * reads natively and have no need for constructing the permutation pattern
+ * variables.
+ */
+# define COMPUTE_SHIFT_MASK(source)
+
+# define COMPUTE_SHIFT_MASKS(dest, source)
+
+# define COMPUTE_SHIFT_MASKC(dest, source, mask)
+
+# define LOAD_VECTORS(dest, source)                        \
+    v ## source = *((typeof(v ## source)*)source);        \
+    v ## dest = *((typeof(v ## dest)*)dest);
+
+# define LOAD_VECTORSC(dest, source, mask)                 \
+    v ## source = *((typeof(v ## source)*)source);        \
+    v ## dest = *((typeof(v ## dest)*)dest);              \
+    v ## mask = *((typeof(v ## mask)*)mask);
+
+#endif //WORDS_BIGENDIAN
+
+/* notice you have to declare temp vars...
+ * Note: tmp3 and tmp4 must remain untouched!
+ */
+
 #define LOAD_VECTORSM(dest, source, mask)				\
     LOAD_VECTORSC (dest, source, mask)					\
     v ## source = pix_multiply (v ## source,				\
-- 
2.1.4