[Pixman] [PATCH 3/5] ARM: NEON: Replace old bilinear scanline generator with new template

Taekyun Kim podain77 at gmail.com
Tue Oct 11 06:24:45 PDT 2011


From: Taekyun Kim <tkq.kim at samsung.com>

Bilinear scanline functions in pixman-arm-neon-asm-bilinear.S can
be replaced with new template just by wrapping existing macros.
---
 pixman/pixman-arm-neon-asm-bilinear.S |  484 ++++++++++++++++++++-------------
 1 files changed, 292 insertions(+), 192 deletions(-)

diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
index 784e5df..25bcb24 100644
--- a/pixman/pixman-arm-neon-asm-bilinear.S
+++ b/pixman/pixman-arm-neon-asm-bilinear.S
@@ -582,198 +582,6 @@ fname:
     bilinear_store_&dst_fmt 4, q2, q3
 .endm
 
-.macro generate_bilinear_scanline_func_src_dst \
-                fname, src_fmt, dst_fmt, op, \
-                bpp_shift, prefetch_distance
-
-pixman_asm_function fname
-    OUT       .req      r0
-    TOP       .req      r1
-    BOTTOM    .req      r2
-    WT        .req      r3
-    WB        .req      r4
-    X         .req      r5
-    UX        .req      r6
-    WIDTH     .req      ip
-    TMP1      .req      r3
-    TMP2      .req      r4
-    PF_OFFS   .req      r7
-    TMP3      .req      r8
-    TMP4      .req      r9
-    STRIDE    .req      r2
-
-    mov       ip, sp
-    push      {r4, r5, r6, r7, r8, r9}
-    mov       PF_OFFS, #prefetch_distance
-    ldmia     ip, {WB, X, UX, WIDTH}
-    mul       PF_OFFS, PF_OFFS, UX
-
-    .set prefetch_offset, prefetch_distance
-
-    sub       STRIDE, BOTTOM, TOP
-    .unreq    BOTTOM
-
-    cmp       WIDTH, #0
-    ble       3f
-
-    vdup.u16  q12, X
-    vdup.u16  q13, UX
-    vdup.u8   d28, WT
-    vdup.u8   d29, WB
-    vadd.u16  d25, d25, d26
-    vadd.u16  q13, q13, q13
-    vshr.u16  q15, q12, #8
-    vadd.u16  q12, q12, q13
-
-    subs      WIDTH, WIDTH, #4
-    blt       1f
-    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
-0:
-    bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op
-    subs      WIDTH, WIDTH, #4
-    bge       0b
-1:
-    tst       WIDTH, #2
-    beq       2f
-    bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op
-2:
-    tst       WIDTH, #1
-    beq       3f
-    bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op
-3:
-    pop       {r4, r5, r6, r7, r8, r9}
-    bx        lr
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    TMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.endfunc
-
-.endm
-
-.macro generate_bilinear_scanline_func_src_a8_dst \
-                fname, src_fmt, dst_fmt, op, \
-                bpp_shift, prefetch_distance
-
-pixman_asm_function fname
-    OUT       .req      r0
-    MASK      .req      r1
-    TOP       .req      r2
-    BOTTOM    .req      r3
-    WT        .req      r4
-    WB        .req      r5
-    X         .req      r6
-    UX        .req      r7
-    WIDTH     .req      ip
-    TMP1      .req      r4
-    TMP2      .req      r5
-    PF_OFFS   .req      r8
-    TMP3      .req      r9
-    TMP4      .req      r10
-    STRIDE    .req      r3
-
-    mov       ip, sp
-    push      {r4, r5, r6, r7, r8, r9, r10, ip}
-    mov       PF_OFFS, #prefetch_distance
-    ldmia     ip, {WT, WB, X, UX, WIDTH}
-    mul       PF_OFFS, PF_OFFS, UX
-
-    .set prefetch_offset, prefetch_distance
-
-    sub       STRIDE, BOTTOM, TOP
-    .unreq    BOTTOM
-
-    cmp       WIDTH, #0
-    ble       3f
-
-    vdup.u16  q12, X
-    vdup.u16  q13, UX
-    vdup.u8   d28, WT
-    vdup.u8   d29, WB
-    vadd.u16  d25, d25, d26
-    vadd.u16  q13, q13, q13
-    vshr.u16  q15, q12, #8
-    vadd.u16  q12, q12, q13
-
-    subs      WIDTH, WIDTH, #4
-    blt       1f
-    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
-0:
-    bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op
-    subs      WIDTH, WIDTH, #4
-    bge       0b
-1:
-    tst       WIDTH, #2
-    beq       2f
-    bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op
-2:
-    tst       WIDTH, #1
-    beq       3f
-    bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op
-3:
-    pop       {r4, r5, r6, r7, r8, r9, r10, ip}
-    bx        lr
-
-    .unreq    OUT
-    .unreq    TOP
-    .unreq    WT
-    .unreq    WB
-    .unreq    X
-    .unreq    UX
-    .unreq    WIDTH
-    .unreq    MASK
-    .unreq    TMP1
-    .unreq    TMP2
-    .unreq    PF_OFFS
-    .unreq    TMP3
-    .unreq    TMP4
-    .unreq    STRIDE
-.endfunc
-
-.endm
-
-generate_bilinear_scanline_func_src_dst \
-    pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
-    8888, 8888, over, 2, 28
-
-generate_bilinear_scanline_func_src_dst \
-    pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
-    8888, 8888, add, 2, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
-    8888, 8888, src, 2, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
-    8888, 0565, src, 2, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
-    0565, 8888, src, 1, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
-    0565, 0565, src, 1, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
-    8888, 8888, over, 2, 28
-
-generate_bilinear_scanline_func_src_a8_dst \
-    pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
-    8888, 8888, add, 2, 28
-
 .set BILINEAR_FLAG_USE_MASK,		1
 .set BILINEAR_FLAG_USE_ALL_NEON_REGS,	2
 
@@ -855,6 +663,8 @@ pixman_asm_function fname
     TMP4      .req      r10
     STRIDE    .req      r3
 
+    .set prefetch_offset, prefetch_distance
+
     mov       ip, sp
     push      {r4, r5, r6, r7, r8, r9, r10, ip}
     mov       PF_OFFS, #prefetch_distance
@@ -968,3 +778,293 @@ pixman_asm_function fname
 .endfunc
 
 .endm
+
+/* src_8888_8_8888 */
+.macro bilinear_src_8888_8_8888_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, 8, 8888, src
+.endm
+
+.macro bilinear_src_8888_8_8888_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, 8, 8888, src
+.endm
+
+.macro bilinear_src_8888_8_8888_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, 8, 8888, src
+.endm
+
+.macro bilinear_src_8888_8_8888_process_pixblock_head
+    bilinear_src_8888_8_8888_process_four_pixels
+.endm
+
+.macro bilinear_src_8888_8_8888_process_pixblock_tail
+.endm
+
+.macro bilinear_src_8888_8_8888_process_pixblock_tail_head
+    bilinear_src_8888_8_8888_process_pixblock_tail
+    bilinear_src_8888_8_8888_process_pixblock_head
+.endm
+
+/* src_8888_8_0565 */
+.macro bilinear_src_8888_8_0565_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, 8, 0565, src
+.endm
+
+.macro bilinear_src_8888_8_0565_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, 8, 0565, src
+.endm
+
+.macro bilinear_src_8888_8_0565_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, 8, 0565, src
+.endm
+
+.macro bilinear_src_8888_8_0565_process_pixblock_head
+    bilinear_src_8888_8_0565_process_four_pixels
+.endm
+
+.macro bilinear_src_8888_8_0565_process_pixblock_tail
+.endm
+
+.macro bilinear_src_8888_8_0565_process_pixblock_tail_head
+    bilinear_src_8888_8_0565_process_pixblock_tail
+    bilinear_src_8888_8_0565_process_pixblock_head
+.endm
+
+/* src_0565_8_x888 */
+.macro bilinear_src_0565_8_x888_process_last_pixel
+    bilinear_interpolate_last_pixel 0565, 8, 8888, src
+.endm
+
+.macro bilinear_src_0565_8_x888_process_two_pixels
+    bilinear_interpolate_two_pixels 0565, 8, 8888, src
+.endm
+
+.macro bilinear_src_0565_8_x888_process_four_pixels
+    bilinear_interpolate_four_pixels 0565, 8, 8888, src
+.endm
+
+.macro bilinear_src_0565_8_x888_process_pixblock_head
+    bilinear_src_0565_8_x888_process_four_pixels
+.endm
+
+.macro bilinear_src_0565_8_x888_process_pixblock_tail
+.endm
+
+.macro bilinear_src_0565_8_x888_process_pixblock_tail_head
+    bilinear_src_0565_8_x888_process_pixblock_tail
+    bilinear_src_0565_8_x888_process_pixblock_head
+.endm
+
+/* src_0565_8_0565 */
+.macro bilinear_src_0565_8_0565_process_last_pixel
+    bilinear_interpolate_last_pixel 0565, 8, 0565, src
+.endm
+
+.macro bilinear_src_0565_8_0565_process_two_pixels
+    bilinear_interpolate_two_pixels 0565, 8, 0565, src
+.endm
+
+.macro bilinear_src_0565_8_0565_process_four_pixels
+    bilinear_interpolate_four_pixels 0565, 8, 0565, src
+.endm
+
+.macro bilinear_src_0565_8_0565_process_pixblock_head
+    bilinear_src_0565_8_0565_process_four_pixels
+.endm
+
+.macro bilinear_src_0565_8_0565_process_pixblock_tail
+.endm
+
+.macro bilinear_src_0565_8_0565_process_pixblock_tail_head
+    bilinear_src_0565_8_0565_process_pixblock_tail
+    bilinear_src_0565_8_0565_process_pixblock_head
+.endm
+
+/* over_8888_8888 */
+.macro bilinear_over_8888_8888_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, x, 8888, over
+.endm
+
+.macro bilinear_over_8888_8888_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, x, 8888, over
+.endm
+
+.macro bilinear_over_8888_8888_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, x, 8888, over
+.endm
+
+.macro bilinear_over_8888_8888_process_pixblock_head
+    bilinear_over_8888_8888_process_four_pixels
+.endm
+
+.macro bilinear_over_8888_8888_process_pixblock_tail
+.endm
+
+.macro bilinear_over_8888_8888_process_pixblock_tail_head
+    bilinear_over_8888_8888_process_pixblock_tail
+    bilinear_over_8888_8888_process_pixblock_head
+.endm
+
+/* over_8888_8_8888 */
+.macro bilinear_over_8888_8_8888_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, 8, 8888, over
+.endm
+
+.macro bilinear_over_8888_8_8888_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, 8, 8888, over
+.endm
+
+.macro bilinear_over_8888_8_8888_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, 8, 8888, over
+.endm
+
+.macro bilinear_over_8888_8_8888_process_pixblock_head
+    bilinear_over_8888_8_8888_process_four_pixels
+.endm
+
+.macro bilinear_over_8888_8_8888_process_pixblock_tail
+.endm
+
+.macro bilinear_over_8888_8_8888_process_pixblock_tail_head
+    bilinear_over_8888_8_8888_process_pixblock_tail
+    bilinear_over_8888_8_8888_process_pixblock_head
+.endm
+
+/* add_8888_8888 */
+.macro bilinear_add_8888_8888_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, x, 8888, add
+.endm
+
+.macro bilinear_add_8888_8888_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, x, 8888, add
+.endm
+
+.macro bilinear_add_8888_8888_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, x, 8888, add
+.endm
+
+.macro bilinear_add_8888_8888_process_pixblock_head
+    bilinear_add_8888_8888_process_four_pixels
+.endm
+
+.macro bilinear_add_8888_8888_process_pixblock_tail
+.endm
+
+.macro bilinear_add_8888_8888_process_pixblock_tail_head
+    bilinear_add_8888_8888_process_pixblock_tail
+    bilinear_add_8888_8888_process_pixblock_head
+.endm
+
+/* add_8888_8_8888 */
+.macro bilinear_add_8888_8_8888_process_last_pixel
+    bilinear_interpolate_last_pixel 8888, 8, 8888, add
+.endm
+
+.macro bilinear_add_8888_8_8888_process_two_pixels
+    bilinear_interpolate_two_pixels 8888, 8, 8888, add
+.endm
+
+.macro bilinear_add_8888_8_8888_process_four_pixels
+    bilinear_interpolate_four_pixels 8888, 8, 8888, add
+.endm
+
+.macro bilinear_add_8888_8_8888_process_pixblock_head
+    bilinear_add_8888_8_8888_process_four_pixels
+.endm
+
+.macro bilinear_add_8888_8_8888_process_pixblock_tail
+.endm
+
+.macro bilinear_add_8888_8_8888_process_pixblock_tail_head
+    bilinear_add_8888_8_8888_process_pixblock_tail
+    bilinear_add_8888_8_8888_process_pixblock_head
+.endm
+
+
+/* Bilinear scanline functions */
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
+    8888, 8888, 2, 2, \
+    bilinear_src_8888_8_8888_process_last_pixel, \
+    bilinear_src_8888_8_8888_process_two_pixels, \
+    bilinear_src_8888_8_8888_process_four_pixels, \
+    bilinear_src_8888_8_8888_process_pixblock_head, \
+    bilinear_src_8888_8_8888_process_pixblock_tail, \
+    bilinear_src_8888_8_8888_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
+    8888, 0565, 2, 1, \
+    bilinear_src_8888_8_0565_process_last_pixel, \
+    bilinear_src_8888_8_0565_process_two_pixels, \
+    bilinear_src_8888_8_0565_process_four_pixels, \
+    bilinear_src_8888_8_0565_process_pixblock_head, \
+    bilinear_src_8888_8_0565_process_pixblock_tail, \
+    bilinear_src_8888_8_0565_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
+    0565, 8888, 1, 2, \
+    bilinear_src_0565_8_x888_process_last_pixel, \
+    bilinear_src_0565_8_x888_process_two_pixels, \
+    bilinear_src_0565_8_x888_process_four_pixels, \
+    bilinear_src_0565_8_x888_process_pixblock_head, \
+    bilinear_src_0565_8_x888_process_pixblock_tail, \
+    bilinear_src_0565_8_x888_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
+    0565, 0565, 1, 1, \
+    bilinear_src_0565_8_0565_process_last_pixel, \
+    bilinear_src_0565_8_0565_process_two_pixels, \
+    bilinear_src_0565_8_0565_process_four_pixels, \
+    bilinear_src_0565_8_0565_process_pixblock_head, \
+    bilinear_src_0565_8_0565_process_pixblock_tail, \
+    bilinear_src_0565_8_0565_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
+    8888, 8888, 2, 2, \
+    bilinear_over_8888_8888_process_last_pixel, \
+    bilinear_over_8888_8888_process_two_pixels, \
+    bilinear_over_8888_8888_process_four_pixels, \
+    bilinear_over_8888_8888_process_pixblock_head, \
+    bilinear_over_8888_8888_process_pixblock_tail, \
+    bilinear_over_8888_8888_process_pixblock_tail_head, \
+    4, 28, 0
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
+    8888, 8888, 2, 2, \
+    bilinear_over_8888_8_8888_process_last_pixel, \
+    bilinear_over_8888_8_8888_process_two_pixels, \
+    bilinear_over_8888_8_8888_process_four_pixels, \
+    bilinear_over_8888_8_8888_process_pixblock_head, \
+    bilinear_over_8888_8_8888_process_pixblock_tail, \
+    bilinear_over_8888_8_8888_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
+    8888, 8888, 2, 2, \
+    bilinear_add_8888_8888_process_last_pixel, \
+    bilinear_add_8888_8888_process_two_pixels, \
+    bilinear_add_8888_8888_process_four_pixels, \
+    bilinear_add_8888_8888_process_pixblock_head, \
+    bilinear_add_8888_8888_process_pixblock_tail, \
+    bilinear_add_8888_8888_process_pixblock_tail_head, \
+    4, 28, 0
+
+generate_bilinear_scanline_func \
+    pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
+    8888, 8888, 2, 2, \
+    bilinear_add_8888_8_8888_process_last_pixel, \
+    bilinear_add_8888_8_8888_process_two_pixels, \
+    bilinear_add_8888_8_8888_process_four_pixels, \
+    bilinear_add_8888_8_8888_process_pixblock_head, \
+    bilinear_add_8888_8_8888_process_pixblock_tail, \
+    bilinear_add_8888_8_8888_process_pixblock_tail_head, \
+    4, 28, BILINEAR_FLAG_USE_MASK
-- 
1.7.1



More information about the Pixman mailing list