[Pixman] [PATCH] ARM: NEON: Use AArch64 compatible syntax for barrel shifter arguments
Siarhei Siamashka
siarhei.siamashka at gmail.com
Sat Apr 2 00:40:01 UTC 2016
The following 32-bit ARM code
mov TMP1, X, asr #16
add TMP1, TOP, TMP1, asl #2
needs to be converted into
asr TMP1, X, #16
add TMP1, TOP, TMP1, lsl #2
in order to be accepted by both 32-bit and 64-bit ARM assemblers.
The conversion has been done using the following ruby script:
File.open(ARGV[0]).each_line do |l|
# Replace "mov X, Y, asr #" -> "asr X, Y, #"
l.gsub!(/mov(\s+[^,]+,[^,]+,)\s*asr\s*\#/, 'asr\1 #')
# Replace "add X, Y, Z, asl #" -> "add X, Y, Z, lsl #"
l.gsub!(/(add\s+[^,]+,[^,]+,[^,]+,\s*)asl(\s+\#)/, '\1lsl\2')
puts l
end
The source files are assembled into exactly the same object files
after this change.
Signed-off-by: Siarhei Siamashka <siarhei.siamashka at gmail.com>
---
pixman/pixman-arm-neon-asm-bilinear.S | 98 ++++++++++++-------------
pixman/pixman-arm-neon-asm.S | 132 +++++++++++++++++-----------------
pixman/pixman-arm-neon-asm.h | 52 +++++++-------
3 files changed, 141 insertions(+), 141 deletions(-)
diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
index 0fd92d6..a7d94c3 100644
--- a/pixman/pixman-arm-neon-asm-bilinear.S
+++ b/pixman/pixman-arm-neon-asm-bilinear.S
@@ -79,17 +79,17 @@
*/
.macro bilinear_load_8888 reg1, reg2, tmp
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
+ add TMP1, TOP, TMP1, lsl #2
vld1.32 {reg1}, [TMP1], STRIDE
vld1.32 {reg2}, [TMP1]
.endm
.macro bilinear_load_0565 reg1, reg2, tmp
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
+ add TMP1, TOP, TMP1, lsl #1
vld1.32 {reg2[0]}, [TMP1], STRIDE
vld1.32 {reg2[1]}, [TMP1]
convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
@@ -119,12 +119,12 @@
.macro bilinear_load_and_vertical_interpolate_two_0565 \
acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {acc2lo[0]}, [TMP1], STRIDE
vld1.32 {acc2hi[0]}, [TMP2], STRIDE
vld1.32 {acc2lo[1]}, [TMP1]
@@ -144,23 +144,23 @@
xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {xacc2lo[0]}, [TMP1], STRIDE
vld1.32 {xacc2hi[0]}, [TMP2], STRIDE
vld1.32 {xacc2lo[1]}, [TMP1]
vld1.32 {xacc2hi[1]}, [TMP2]
convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {yacc2lo[0]}, [TMP1], STRIDE
vzip.u8 xreg1, xreg3
vld1.32 {yacc2hi[0]}, [TMP2], STRIDE
@@ -712,7 +712,7 @@ pixman_asm_function fname
.endif
subs WIDTH, WIDTH, #pixblock_size
blt 1f
- mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
bilinear_process_pixblock_head
subs WIDTH, WIDTH, #pixblock_size
blt 5f
@@ -884,26 +884,26 @@ pixman_asm_function fname
.endm
.macro bilinear_over_8888_8888_process_pixblock_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vld1.32 {d22}, [TMP1], STRIDE
vld1.32 {d23}, [TMP1]
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
+ add TMP3, TOP, TMP3, lsl #2
vmull.u8 q8, d22, d28
vmlal.u8 q8, d23, d29
vld1.32 {d22}, [TMP2], STRIDE
vld1.32 {d23}, [TMP2]
- mov TMP4, X, asr #16
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmull.u8 q9, d22, d28
vmlal.u8 q9, d23, d29
@@ -967,13 +967,13 @@ pixman_asm_function fname
.macro bilinear_over_8888_8888_process_pixblock_tail_head
vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
+ add TMP1, TOP, TMP1, lsl #2
vmlsl.u16 q2, d20, d30
- mov TMP2, X, asr #16
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlal.u16 q2, d21, d30
vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
vld1.32 {d20}, [TMP1], STRIDE
@@ -993,12 +993,12 @@ pixman_asm_function fname
vmovn.u16 d6, q0
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vmovn.u16 d7, q2
vld1.32 {d22}, [TMP3], STRIDE
@@ -1052,21 +1052,21 @@ pixman_asm_function fname
.endm
.macro bilinear_over_8888_8_8888_process_pixblock_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
+ add TMP1, TOP, TMP1, lsl #2
vld1.32 {d0}, [TMP1], STRIDE
- mov TMP2, X, asr #16
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vld1.32 {d1}, [TMP1]
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
+ add TMP3, TOP, TMP3, lsl #2
vld1.32 {d2}, [TMP2], STRIDE
- mov TMP4, X, asr #16
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vld1.32 {d3}, [TMP2]
vmull.u8 q2, d0, d28
vmull.u8 q3, d2, d28
@@ -1138,26 +1138,26 @@ pixman_asm_function fname
.macro bilinear_over_8888_8_8888_process_pixblock_tail_head
vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
+ add TMP1, TOP, TMP1, lsl #2
vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS
vld1.32 {d0}, [TMP1], STRIDE
- mov TMP2, X, asr #16
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlsl.u16 q9, d6, d30
vmlsl.u16 q10, d2, d31
vld1.32 {d1}, [TMP1]
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
+ add TMP3, TOP, TMP3, lsl #2
vmlal.u16 q9, d7, d30
vmlal.u16 q10, d3, d31
vld1.32 {d2}, [TMP2], STRIDE
- mov TMP4, X, asr #16
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
vadd.u16 q12, q12, q13
vld1.32 {d3}, [TMP2]
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 7e949a3..059b285 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2838,17 +2838,17 @@ generate_composite_function_nearest_scanline \
*/
.macro bilinear_load_8888 reg1, reg2, tmp
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
+ add TMP1, TOP, TMP1, lsl #2
vld1.32 {reg1}, [TMP1], STRIDE
vld1.32 {reg2}, [TMP1]
.endm
.macro bilinear_load_0565 reg1, reg2, tmp
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
+ add TMP1, TOP, TMP1, lsl #1
vld1.32 {reg2[0]}, [TMP1], STRIDE
vld1.32 {reg2[1]}, [TMP1]
convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
@@ -2878,12 +2878,12 @@ generate_composite_function_nearest_scanline \
.macro bilinear_load_and_vertical_interpolate_two_0565 \
acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {acc2lo[0]}, [TMP1], STRIDE
vld1.32 {acc2hi[0]}, [TMP2], STRIDE
vld1.32 {acc2lo[1]}, [TMP1]
@@ -2903,23 +2903,23 @@ generate_composite_function_nearest_scanline \
xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {xacc2lo[0]}, [TMP1], STRIDE
vld1.32 {xacc2hi[0]}, [TMP2], STRIDE
vld1.32 {xacc2lo[1]}, [TMP1]
vld1.32 {xacc2hi[1]}, [TMP2]
convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #1
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #1
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #1
+ add TMP2, TOP, TMP2, lsl #1
vld1.32 {yacc2lo[0]}, [TMP1], STRIDE
vzip.u8 xreg1, xreg3
vld1.32 {yacc2hi[0]}, [TMP2], STRIDE
@@ -3176,7 +3176,7 @@ pixman_asm_function fname
0:
subs WIDTH, WIDTH, #8
blt 1f
- mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
subs WIDTH, WIDTH, #8
blt 5f
@@ -3195,7 +3195,7 @@ pixman_asm_function fname
/*********** 4 pixels per iteration *****************/
subs WIDTH, WIDTH, #4
blt 1f
- mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
subs WIDTH, WIDTH, #4
blt 5f
@@ -3245,26 +3245,26 @@ pixman_asm_function fname
.set have_bilinear_interpolate_four_pixels_8888_8888, 1
.macro bilinear_interpolate_four_pixels_8888_8888_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vld1.32 {d22}, [TMP1], STRIDE
vld1.32 {d23}, [TMP1]
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
+ add TMP3, TOP, TMP3, lsl #2
vmull.u8 q8, d22, d28
vmlal.u8 q8, d23, d29
vld1.32 {d22}, [TMP2], STRIDE
vld1.32 {d23}, [TMP2]
- mov TMP4, X, asr #16
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmull.u8 q9, d22, d28
vmlal.u8 q9, d23, d29
@@ -3310,12 +3310,12 @@ pixman_asm_function fname
.endm
.macro bilinear_interpolate_four_pixels_8888_8888_tail_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlal.u16 q1, d19, d31
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
@@ -3336,12 +3336,12 @@ pixman_asm_function fname
vadd.u16 q12, q12, q13
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vld1.32 {d22}, [TMP3], STRIDE
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
@@ -3370,12 +3370,12 @@ pixman_asm_function fname
.set have_bilinear_interpolate_eight_pixels_8888_0565, 1
.macro bilinear_interpolate_eight_pixels_8888_0565_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vld1.32 {d20}, [TMP1], STRIDE
vld1.32 {d21}, [TMP1]
vmull.u8 q8, d20, d28
@@ -3383,12 +3383,12 @@ pixman_asm_function fname
vld1.32 {d22}, [TMP2], STRIDE
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vld1.32 {d22}, [TMP3], STRIDE
vld1.32 {d23}, [TMP3]
@@ -3406,12 +3406,12 @@ pixman_asm_function fname
vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
vmlsl.u16 q1, d18, d31
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlal.u16 q1, d19, d31
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
@@ -3432,12 +3432,12 @@ pixman_asm_function fname
vadd.u16 q12, q12, q13
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vld1.32 {d22}, [TMP3], STRIDE
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
@@ -3492,12 +3492,12 @@ pixman_asm_function fname
.endm
.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlal.u16 q1, d19, d31
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
vuzp.u8 d8, d9
@@ -3519,12 +3519,12 @@ pixman_asm_function fname
vadd.u16 q12, q12, q13
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vld1.32 {d22}, [TMP3], STRIDE
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
@@ -3547,12 +3547,12 @@ pixman_asm_function fname
vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
vmlsl.u16 q1, d18, d31
- mov TMP1, X, asr #16
+ asr TMP1, X, #16
add X, X, UX
- add TMP1, TOP, TMP1, asl #2
- mov TMP2, X, asr #16
+ add TMP1, TOP, TMP1, lsl #2
+ asr TMP2, X, #16
add X, X, UX
- add TMP2, TOP, TMP2, asl #2
+ add TMP2, TOP, TMP2, lsl #2
vmlal.u16 q1, d19, d31
vuzp.u8 d9, d11
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
@@ -3580,12 +3580,12 @@ pixman_asm_function fname
vadd.u16 q12, q12, q13
vld1.32 {d23}, [TMP2]
vmull.u8 q9, d22, d28
- mov TMP3, X, asr #16
+ asr TMP3, X, #16
add X, X, UX
- add TMP3, TOP, TMP3, asl #2
- mov TMP4, X, asr #16
+ add TMP3, TOP, TMP3, lsl #2
+ asr TMP4, X, #16
add X, X, UX
- add TMP4, TOP, TMP4, asl #2
+ add TMP4, TOP, TMP4, lsl #2
vmlal.u8 q9, d23, d29
vld1.32 {d22}, [TMP3], STRIDE
vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index bdcf6a9..6a5a5fe 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -211,41 +211,41 @@
*/
.macro pixld1_s elem_size, reg1, mem_operand
.if elem_size == 16
- mov TMP1, VX, asr #16
+ asr TMP1, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP1, mem_operand, TMP1, asl #1
- mov TMP2, VX, asr #16
+ add TMP1, mem_operand, TMP1, lsl #1
+ asr TMP2, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP2, mem_operand, TMP2, asl #1
+ add TMP2, mem_operand, TMP2, lsl #1
vld1.16 {d®1&[0]}, [TMP1, :16]
- mov TMP1, VX, asr #16
+ asr TMP1, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP1, mem_operand, TMP1, asl #1
+ add TMP1, mem_operand, TMP1, lsl #1
vld1.16 {d®1&[1]}, [TMP2, :16]
- mov TMP2, VX, asr #16
+ asr TMP2, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP2, mem_operand, TMP2, asl #1
+ add TMP2, mem_operand, TMP2, lsl #1
vld1.16 {d®1&[2]}, [TMP1, :16]
vld1.16 {d®1&[3]}, [TMP2, :16]
.elseif elem_size == 32
- mov TMP1, VX, asr #16
+ asr TMP1, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP1, mem_operand, TMP1, asl #2
- mov TMP2, VX, asr #16
+ add TMP1, mem_operand, TMP1, lsl #2
+ asr TMP2, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP2, mem_operand, TMP2, asl #2
+ add TMP2, mem_operand, TMP2, lsl #2
vld1.32 {d®1&[0]}, [TMP1, :32]
vld1.32 {d®1&[1]}, [TMP2, :32]
.else
@@ -255,20 +255,20 @@
.macro pixld2_s elem_size, reg1, reg2, mem_operand
.if 0 /* elem_size == 32 */
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
- mov TMP2, VX, asr #16
+ asr TMP1, VX, #16
+ add VX, VX, UNIT_X, lsl #1
+ add TMP1, mem_operand, TMP1, lsl #2
+ asr TMP2, VX, #16
sub VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
+ add TMP2, mem_operand, TMP2, lsl #2
vld1.32 {d®1&[0]}, [TMP1, :32]
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
+ asr TMP1, VX, #16
+ add VX, VX, UNIT_X, lsl #1
+ add TMP1, mem_operand, TMP1, lsl #2
vld1.32 {d®2&[0]}, [TMP2, :32]
- mov TMP2, VX, asr #16
+ asr TMP2, VX, #16
add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
+ add TMP2, mem_operand, TMP2, lsl #2
vld1.32 {d®1&[1]}, [TMP1, :32]
vld1.32 {d®2&[1]}, [TMP2, :32]
.else
@@ -279,18 +279,18 @@
.macro pixld0_s elem_size, reg1, idx, mem_operand
.if elem_size == 16
- mov TMP1, VX, asr #16
+ asr TMP1, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP1, mem_operand, TMP1, asl #1
+ add TMP1, mem_operand, TMP1, lsl #1
vld1.16 {d®1&[idx]}, [TMP1, :16]
.elseif elem_size == 32
- mov TMP1, VX, asr #16
+ asr TMP1, VX, #16
adds VX, VX, UNIT_X
5: subpls VX, VX, SRC_WIDTH_FIXED
bpl 5b
- add TMP1, mem_operand, TMP1, asl #2
+ add TMP1, mem_operand, TMP1, lsl #2
vld1.32 {d®1&[idx]}, [TMP1, :32]
.endif
.endm
--
2.4.10
More information about the Pixman
mailing list