[pulseaudio-discuss] [PATCH 1/3] WIP sconv: fix ARM NEON s16_to_float conversion
Peter Meerwald
pmeerw at pmeerw.net
Sat Oct 27 05:59:41 PDT 2012
From: Peter Meerwald <p.meerwald at bct-electronic.com>
the C s16_to_float performs
flt = sample / (float) 0x7fff
floating point division is expensive and the obvious solution is to
multiply by the inverse:
flt = sample * (1.0f / 0x7fff)
however, the results differ slightly for 1536 input values
this patch checks for input values that would produce a mismatch and
corrects the output accordingly
Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>
---
src/pulsecore/sconv_neon.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/src/pulsecore/sconv_neon.c b/src/pulsecore/sconv_neon.c
index fd45965..40312b0 100644
--- a/src/pulsecore/sconv_neon.c
+++ b/src/pulsecore/sconv_neon.c
@@ -75,16 +75,23 @@ static void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *src, float *
const float invscale = 1.0f / 0x7FFF;
__asm__ __volatile__ (
- "movs %[n], %[n], lsr #2 \n\t"
+ "movs %[n], %[n], lsr #2 \n\t"
"beq 2f \n\t"
"vdup.f32 q1, %[invscale] \n\t"
+ "vdup.u16 q3, %[mask] \n\t"
+ "vdup.u32 q4, %[one] \n\t"
"1: \n\t"
- "vld1.16 {d0}, [%[src]]! \n\t"
- "vmovl.s16 q0, d0 \n\t"
- "vcvt.f32.s32 q0, q0 \n\t"
- "vmul.f32 q0, q0, q1 \n\t"
+ "vld1.16 {d0}, [%[src]]! \n\t" /* load x */
+ "vmovl.s16 q0, d0 \n\t" /* s16 -> s32 */
+ "vcvt.f32.s32 q0, q0 \n\t" /* s32 -> float */
+
+ "vceq.u16 q2, q0, q3 \n\t" /* check for defect */
+ "vand.u32 q2, q2, q4 \n\t" /* prepare 1 if defect */
+
+ "vmul.f32 q0, q0, q1 \n\t" /* multiply by invscale */
+ "vadd.u32 q0, q0, q2 \n\t" /* correct if defect */
"subs %[n], %[n], #1 \n\t"
"vst1.32 {q0}, [%[dst]]! \n\t"
"bgt 1b \n\t"
@@ -92,13 +99,13 @@ static void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *src, float *
"2: \n\t"
: [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) /* output operands (or input operands that get modified) */
- : [invscale] "r" (invscale) /* input operands */
- : "memory", "cc", "q0", "q1" /* clobber list */
+ : [invscale] "r" (invscale), [mask] "r" (0x4000), [one] "r" (1) /* input operands */
+ : "memory", "cc", "q0", "q1", "q2", "q3", "q4" /* clobber list */
);
/* leftovers */
while (i--) {
- *dst++ = *src++ * invscale;
+ *dst++ = *src++ / (float) 0x7fff;
}
}
--
1.7.9.5
More information about the pulseaudio-discuss
mailing list