[Pixman] [PATCH 1/1 v2] vmx: workarounds to fix powerpc little endian particularities
Fernando Seiti Furusato
ferseiti at linux.vnet.ibm.com
Mon Jun 1 13:46:00 PDT 2015
I have made some changes to the file pixman-vmx.c, which uses vmx (aka altivec)
to optimize pixman. Basically, what I did:
Changed vec_perm to now perform xor operation over the positions so it will
work regardless of endianness.
Replaced usage of vec_mergeh, vec_mergel and vec_mladd by vec_mulo and
vec_mule plus vec_add and vec_perm. The result is the same and not affected
by endianness differences.
Replaced usage of vec_lvsl to direct unaligned assignment operation (=). That
is because, according to Power ABI Specification, the usage of lvsl is
deprecated on ppc64le.
Changed COMPUTE_SHIFT_{MASK,MASKS,MASKC} macro usage to no-op for powerpc
little endian since unaligned access is supported on ppc64le.
After those changes, all tests passed on ppc64, ppc64le and powerpc vms.
Signed-off-by: Fernando Seiti Furusato <ferseiti at linux.vnet.ibm.com>
---
pixman/pixman-vmx.c | 106 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 67 insertions(+), 39 deletions(-)
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index c33631c..57918c1 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -37,46 +37,49 @@
static force_inline vector unsigned int
splat_alpha (vector unsigned int pix)
{
- return vec_perm (pix, pix,
- (vector unsigned char)AVV (
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
- 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
+ union {
+ unsigned short s;
+ unsigned char c[2];
+ } endian_xor = {0x0300};
+
+ /* endian_xor.c[1] will be 3 if little endian and 0 if big endian */
+ vector unsigned char perm = vec_splat((vector unsigned char)
+ AVV (endian_xor.c[1]),0);
+ perm = vec_xor (perm,(vector unsigned char) AVV (
+ 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
+ 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
+ return vec_perm (pix, pix, perm);
}
static force_inline vector unsigned int
pix_multiply (vector unsigned int p, vector unsigned int a)
{
- vector unsigned short hi, lo, mod;
-
- /* unpack to short */
- hi = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)p);
-
- mod = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)a);
-
- hi = vec_mladd (hi, mod, (vector unsigned short)
- AVV (0x0080, 0x0080, 0x0080, 0x0080,
- 0x0080, 0x0080, 0x0080, 0x0080));
+ vector unsigned short hi, lo, even, odd;
+
+ /* unpack to short while multiplying p and a even positions */
+ even = vec_mule((vector unsigned char)p, (vector unsigned char)a);
+ even = vec_add(even, (vector unsigned short)AVV
+ (0x0080, 0x0080, 0x0080, 0x0080,
+ 0x0080, 0x0080, 0x0080, 0x0080));
+
+ /* unpack to short while multiplying p and a odd positions */
+ odd = vec_mulo ((vector unsigned char)p, (vector unsigned char)a);
+ odd = vec_add (odd, (vector unsigned short)AVV
+ (0x0080, 0x0080, 0x0080, 0x0080,
+ 0x0080, 0x0080, 0x0080, 0x0080));
+
+ /* change split from even and odd positions to high and low ends */
+ hi = vec_perm (even, odd, (vector unsigned char)AVV
+ (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
+ 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+ lo = vec_perm (even, odd, (vector unsigned char)AVV
+ (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
+ 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
hi = vec_sr (hi, vec_splat_u16 (8));
- /* unpack to short */
- lo = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)p);
- mod = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)a);
-
- lo = vec_mladd (lo, mod, (vector unsigned short)
- AVV (0x0080, 0x0080, 0x0080, 0x0080,
- 0x0080, 0x0080, 0x0080, 0x0080));
-
lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
lo = vec_sr (lo, vec_splat_u16 (8));
@@ -129,29 +132,26 @@ over (vector unsigned int src,
over (pix_multiply (src, mask), \
pix_multiply (srca, mask), dest)
+#ifdef WORDS_BIGENDIAN
-#define COMPUTE_SHIFT_MASK(source) \
+# define COMPUTE_SHIFT_MASK(source) \
source ## _mask = vec_lvsl (0, source);
-#define COMPUTE_SHIFT_MASKS(dest, source) \
+# define COMPUTE_SHIFT_MASKS(dest, source) \
source ## _mask = vec_lvsl (0, source);
-#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
+# define COMPUTE_SHIFT_MASKC(dest, source, mask) \
mask ## _mask = vec_lvsl (0, mask); \
source ## _mask = vec_lvsl (0, source);
-/* notice you have to declare temp vars...
- * Note: tmp3 and tmp4 must remain untouched!
- */
-
-#define LOAD_VECTORS(dest, source) \
+# define LOAD_VECTORS(dest, source) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
v ## dest = (typeof(v ## dest))vec_ld (0, dest);
-#define LOAD_VECTORSC(dest, source, mask) \
+# define LOAD_VECTORSC(dest, source, mask) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
v ## source = (typeof(v ## source)) \
@@ -162,6 +162,34 @@ over (vector unsigned int src,
v ## mask = (typeof(v ## mask)) \
vec_perm (tmp1, tmp2, mask ## _mask);
+#else //WORDS_BIGENDIAN
+
+/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op.
+ * They are defined that way because little endian altivec can do unaligned
+ * reads natively and have no need for constructing the permutation pattern
+ * variables.
+ */
+# define COMPUTE_SHIFT_MASK(source)
+
+# define COMPUTE_SHIFT_MASKS(dest, source)
+
+# define COMPUTE_SHIFT_MASKC(dest, source, mask)
+
+# define LOAD_VECTORS(dest, source) \
+ v ## source = *((typeof(v ## source)*)source); \
+ v ## dest = *((typeof(v ## dest)*)dest);
+
+# define LOAD_VECTORSC(dest, source, mask) \
+ v ## source = *((typeof(v ## source)*)source); \
+ v ## dest = *((typeof(v ## dest)*)dest); \
+ v ## mask = *((typeof(v ## mask)*)mask);
+
+#endif //WORDS_BIGENDIAN
+
+/* notice you have to declare temp vars...
+ * Note: tmp3 and tmp4 must remain untouched!
+ */
+
#define LOAD_VECTORSM(dest, source, mask) \
LOAD_VECTORSC (dest, source, mask) \
v ## source = pix_multiply (v ## source, \
--
2.1.4
More information about the Pixman
mailing list