[Pixman] [PATCH 1/2] ARMv6: Fixed a couple of preload bugs.
Ben Avison
bavison at riscosopen.org
Mon Jan 14 11:16:40 PST 2013
One is that the pixel count wasn't being shifted correctly for 8bpp or 16bpp
images in the narrow case. The fix is illustrated by src_8_8:
Before After
Mean StdDev Mean StdDev Change Confidence
L1 592.6 40.4 615.1 75.6 3.8% 56% (insignificant)
L2 235.0 5.5 230.5 19.8 -1.9% 48% (insignificant)
M 229.2 2.2 229.0 1.5 -0.1% 20% (insignificant)
HT 60.0 0.4 62.4 0.6 4.0% 100.0%
VT 52.9 0.5 53.4 0.5 0.9% 94.5% (insignificant)
R 45.2 0.4 47.7 0.7 5.6% 100.0%
RT 12.0 0.4 12.1 1.6 0.8% 14% (insignificant)
The second one meant that only the source pointer was being used for preloads
for mid-width rectangles (typically between 32-160 bytes). A routine that
illustrates this is over_8888_8888, where the destination buffer is supposed
to be preloaded:
Before After
Mean StdDev Mean StdDev Change Confidence
L1 37.6 0.4 37.9 0.3 1.0% 99.5%
L2 30.8 0.5 30.8 0.5 0.1% 22% (insignificant)
M 25.8 0.0 25.8 0.0 0.0% 21% (insignificant)
HT 14.4 0.1 15.5 0.1 8.0% 100.0%
VT 13.8 0.1 14.6 0.1 6.2% 100.0%
R 14.3 0.1 15.7 0.1 10.3% 100.0%
RT 6.7 0.4 7.6 0.4 12.5% 100.0%
This bug also explains why medium-width rectangle prefetch was a regression
for over_n_8_8888. Now it can be re-enabled, and results are:
Before After
Mean StdDev Mean StdDev Change Confidence
L1 22.8 0.2 22.8 0.2 -0.2% 41% (insignificant)
L2 21.8 0.1 21.8 0.1 0.1% 37% (insignificant)
M 22.2 0.0 22.2 0.1 -0.1% 56% (insignificant)
HT 12.3 0.1 14.1 0.1 14.4% 100.0%
VT 11.7 0.1 13.3 0.6 13.8% 100.0%
R 10.9 0.1 12.8 0.5 17.1% 100.0%
RT 5.9 0.1 6.5 0.1 11.3% 100.0%
---
pixman/pixman-arm-simd-asm.S | 2 +-
pixman/pixman-arm-simd-asm.h | 22 ++++++----------------
2 files changed, 7 insertions(+), 17 deletions(-)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 8700da9..f043826 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -576,7 +576,7 @@ generate_composite_function \
generate_composite_function \
pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
- FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_ONLY_PRELOAD_WIDE \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
2, /* prefetch distance */ \
over_n_8_8888_init, \
over_n_8_8888_newline, \
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index c1db3fc..ee70131 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -232,7 +232,7 @@
/* In these cases, each line for each channel is in either 1 or 2 cache lines */
PF bic, WK0, base, #31
PF pld, [WK0]
- PF add, WK1, base, X, LSL #2
+ PF add, WK1, base, X, LSL #bpp_shift
PF sub, WK1, WK1, #1
PF bic, WK1, WK1, #31
PF cmp, WK1, WK0
@@ -240,9 +240,9 @@
PF pld, [WK1]
90:
.else
- PF bic, WK0, SRC, #31
+ PF bic, WK0, base, #31
PF pld, [WK0]
- PF add, WK1, SRC, X, lsl #bpp_shift
+ PF add, WK1, base, X, lsl #bpp_shift
PF sub, WK1, WK1, #1
PF bic, WK1, WK1, #31
PF cmp, WK1, WK0
@@ -399,18 +399,8 @@
preload_trailing mask_bpp, mask_bpp_shift, MASK
preload_trailing dst_r_bpp, dst_bpp_shift, DST
add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
-113:
- process_head , 16, 0, unaligned_src, unaligned_mask, 0
- process_tail , 16, 0
- .if !((flags) & FLAG_PROCESS_DOES_STORE)
- pixst , 16, 0, DST
- .endif
- subs X, X, #128/dst_w_bpp
- bhs 113b
- /* Trailing pixels */
- tst X, #128/dst_w_bpp - 1
- beq exit_label
- trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask
+ /* The remainder of the line is handled identically to the medium case */
+ medium_case_inner_loop_and_trailing_pixels process_head, process_tail, exit_label, unaligned_src, unaligned_mask
.endm
.macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, exit_label, unaligned_src, unaligned_mask
@@ -723,7 +713,7 @@ fname:
sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
tst DST, #15
beq 164f
- rsb WK0, DST, #0 /* bits 0-4 = number of leading bytes until destination aligned */
+ rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
leading_15bytes process_head, process_tail
--
1.7.5.4
More information about the Pixman
mailing list