[Pixman] [PATCH 05/10] ARM: performance tuning of NEON nearest scaled pixel fetcher
Siarhei Siamashka
siarhei.siamashka at gmail.com
Wed Nov 3 16:22:20 PDT 2010
From: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Interleaving the use of NEON registers helps to avoid some stalls
in NEON pipeline and provides a small performance improvement.
---
pixman/pixman-arm-neon-asm.h | 33 +++++++++++++++++++++++++++------
1 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index d3b506d..c75bdc3 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -241,6 +241,30 @@
.endif
.endm
+.macro pixld2_s elem_size, reg1, reg2, mem_operand
+.if elem_size == 32
+ mov TMP1, VX, asr #16
+ add VX, VX, UNIT_X, asl #1
+ add TMP1, mem_operand, TMP1, asl #2
+ mov TMP2, VX, asr #16
+ sub VX, VX, UNIT_X
+ add TMP2, mem_operand, TMP2, asl #2
+ vld1.32 {d®1&[0]}, [TMP1, :32]
+ mov TMP1, VX, asr #16
+ add VX, VX, UNIT_X, asl #1
+ add TMP1, mem_operand, TMP1, asl #2
+ vld1.32 {d®2&[0]}, [TMP2, :32]
+ mov TMP2, VX, asr #16
+ add VX, VX, UNIT_X
+ add TMP2, mem_operand, TMP2, asl #2
+ vld1.32 {d®1&[1]}, [TMP1, :32]
+ vld1.32 {d®2&[1]}, [TMP2, :32]
+.else
+ pixld1_s elem_size, reg1, mem_operand
+ pixld1_s elem_size, reg2, mem_operand
+.endif
+.endm
+
.macro pixld0_s elem_size, reg1, idx, mem_operand
.if elem_size == 16
mov TMP1, VX, asr #16
@@ -257,14 +281,11 @@
.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
.if numbytes == 32
- pixld1_s elem_size, %(basereg+4), mem_operand
- pixld1_s elem_size, %(basereg+5), mem_operand
- pixld1_s elem_size, %(basereg+6), mem_operand
- pixld1_s elem_size, %(basereg+7), mem_operand
+ pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
+ pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
pixdeinterleave elem_size, %(basereg+4)
.elseif numbytes == 16
- pixld1_s elem_size, %(basereg+2), mem_operand
- pixld1_s elem_size, %(basereg+3), mem_operand
+ pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
.elseif numbytes == 8
pixld1_s elem_size, %(basereg+1), mem_operand
.elseif numbytes == 4
--
1.7.2.2
More information about the Pixman
mailing list