[Pixman] [PATCH 1/2] ARMv6: Fixed a couple of preload bugs.

Ben Avison bavison at riscosopen.org
Mon Jan 14 11:16:40 PST 2013


One is that the pixel count wasn't being shifted correctly for 8bpp or 16bpp
images in the narrow case. The fix is illustrated by src_8_8:

        Before         After
      Mean  StdDev   Mean StdDev  Change  Confidence
L1    592.6   40.4  615.1   75.6    3.8%  56% (insignificant)
L2    235.0    5.5  230.5   19.8   -1.9%  48% (insignificant)
M     229.2    2.2  229.0    1.5   -0.1%  20% (insignificant)
HT     60.0    0.4   62.4    0.6    4.0%  100.0%
VT     52.9    0.5   53.4    0.5    0.9%  94.5% (insignificant)
R      45.2    0.4   47.7    0.7    5.6%  100.0%
RT     12.0    0.4   12.1    1.6    0.8%  14% (insignificant)

The second one meant that only the source pointer was being used for preloads
for mid-width rectangles (typically between 32-160 bytes). A routine that
illustrates this is over_8888_8888, where the destination buffer is supposed
to be preloaded:

        Before         After
      Mean  StdDev   Mean StdDev  Change  Confidence
L1     37.6    0.4   37.9    0.3    1.0%  99.5%
L2     30.8    0.5   30.8    0.5    0.1%  22% (insignificant)
M      25.8    0.0   25.8    0.0    0.0%  21% (insignificant)
HT     14.4    0.1   15.5    0.1    8.0%  100.0%
VT     13.8    0.1   14.6    0.1    6.2%  100.0%
R      14.3    0.1   15.7    0.1   10.3%  100.0%
RT      6.7    0.4    7.6    0.4   12.5%  100.0%

This bug also explains why medium-width rectangle prefetch was a regression
for over_n_8_8888. Now it can be re-enabled, and results are:

        Before         After
      Mean  StdDev   Mean StdDev  Change  Confidence
L1     22.8    0.2   22.8    0.2   -0.2%  41% (insignificant)
L2     21.8    0.1   21.8    0.1    0.1%  37% (insignificant)
M      22.2    0.0   22.2    0.1   -0.1%  56% (insignificant)
HT     12.3    0.1   14.1    0.1   14.4%  100.0%
VT     11.7    0.1   13.3    0.6   13.8%  100.0%
R      10.9    0.1   12.8    0.5   17.1%  100.0%
RT      5.9    0.1    6.5    0.1   11.3%  100.0%
---
 pixman/pixman-arm-simd-asm.S |    2 +-
 pixman/pixman-arm-simd-asm.h |   22 ++++++----------------
 2 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 8700da9..f043826 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -576,7 +576,7 @@ generate_composite_function \
 
 generate_composite_function \
     pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
-    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_ONLY_PRELOAD_WIDE \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
     2, /* prefetch distance */ \
     over_n_8_8888_init, \
     over_n_8_8888_newline, \
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index c1db3fc..ee70131 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -232,7 +232,7 @@
         /* In these cases, each line for each channel is in either 1 or 2 cache lines */
         PF  bic,    WK0, base, #31
         PF  pld,    [WK0]
-        PF  add,    WK1, base, X, LSL #2
+        PF  add,    WK1, base, X, LSL #bpp_shift
         PF  sub,    WK1, WK1, #1
         PF  bic,    WK1, WK1, #31
         PF  cmp,    WK1, WK0
@@ -240,9 +240,9 @@
         PF  pld,    [WK1]
 90:
   .else
-        PF  bic,    WK0, SRC, #31
+        PF  bic,    WK0, base, #31
         PF  pld,    [WK0]
-        PF  add,    WK1, SRC, X, lsl #bpp_shift
+        PF  add,    WK1, base, X, lsl #bpp_shift
         PF  sub,    WK1, WK1, #1
         PF  bic,    WK1, WK1, #31
         PF  cmp,    WK1, WK0
@@ -399,18 +399,8 @@
         preload_trailing  mask_bpp, mask_bpp_shift, MASK
         preload_trailing  dst_r_bpp, dst_bpp_shift, DST
         add     X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
-113:
-        process_head  , 16, 0, unaligned_src, unaligned_mask, 0
-        process_tail  , 16, 0
- .if !((flags) & FLAG_PROCESS_DOES_STORE)
-        pixst   , 16, 0, DST
- .endif
-        subs    X, X, #128/dst_w_bpp
-        bhs     113b
-        /* Trailing pixels */
-        tst     X, #128/dst_w_bpp - 1
-        beq     exit_label
-        trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
+        /* The remainder of the line is handled identically to the medium case */
+        medium_case_inner_loop_and_trailing_pixels  process_head, process_tail, exit_label, unaligned_src, unaligned_mask
 .endm
 
 .macro medium_case_inner_loop_and_trailing_pixels  process_head, process_tail, exit_label, unaligned_src, unaligned_mask
@@ -723,7 +713,7 @@ fname:
         sub     X, X, #128/dst_w_bpp     /* simplifies inner loop termination */
         tst     DST, #15
         beq     164f
-        rsb     WK0, DST, #0 /* bits 0-4 = number of leading bytes until destination aligned */
+        rsb     WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
         
         leading_15bytes  process_head, process_tail
         
-- 
1.7.5.4



More information about the Pixman mailing list