[pulseaudio-commits] [Git][pulseaudio/pulseaudio][master] 3 commits: tests: fix possible segfault in cpu-remap-test

Arun Raghavan gitlab at gitlab.freedesktop.org
Fri Mar 29 06:10:07 UTC 2019



Arun Raghavan pushed to branch master at PulseAudio / pulseaudio


Commits:
ac4a5026 by Sascha Silbe at 2019-03-29T06:04:28Z
tests: fix possible segfault in cpu-remap-test

pa_init_remap_func() takes care to initialise pa_remap_t.do_remap to
NULL before calling init_remap_func (the CPU-specific remap init
function) and invokes init_remap_c if init_remap_func did not set
pa_remap_t.do_remap to non-NULL. remap_init_test_channels() calls
init_remap_func() directly so it must make sure pa_remap_t.do_remap is
set to NULL. Otherwise we'll end up with a random value in
pa_remap_t.do_remap if there is no CPU-optimised remap function for the
current operation.

- - - - -
1e4fb614 by Sascha Silbe at 2019-03-29T06:04:28Z
tests: test NEON 2-channel->4-channel rearrange

We have optimised 2-channel->4-channel rearrange remap functions. Test
them.

- - - - -
034b7782 by Sascha Silbe at 2019-03-29T06:04:28Z
remap: support S32NE work format

So far PulseAudio only supported two different work formats: S16NE if
it's sufficient to represent the input and output formats without loss
of precision and FLOAT32NE in all other cases. For systems that use
S32NE exclusively, this results in unnecessary conversions from S32NE to
FLOAT32NE and back again.

Add S32NE remap operations and make use of them (for the COPY and
TRIVIAL resamplers) if both input and output format are S32NE. This
avoids the back and forth conversions between S32NE and FLOAT32NE,
significantly improving performance for those cases.

- - - - -


7 changed files:

- src/pulsecore/remap.c
- src/pulsecore/remap.h
- src/pulsecore/remap_mmx.c
- src/pulsecore/remap_neon.c
- src/pulsecore/remap_sse.c
- src/pulsecore/resampler.c
- src/tests/cpu-remap-test.c


Changes:

=====================================
src/pulsecore/remap.c
=====================================
@@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
     }
 }
 
+static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i; i--) {
+        dst[0] = dst[1] = src[0];
+        dst[2] = dst[3] = src[1];
+        dst[4] = dst[5] = src[2];
+        dst[6] = dst[7] = src[3];
+        src += 4;
+        dst += 8;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = dst[1] = src[0];
+        src++;
+        dst += 2;
+    }
+}
+
 static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     unsigned i;
 
@@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
     }
 }
 
+static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        /* Avoid overflow by performing division first. We accept a
+         * difference of +/- 1 to the ideal result. */
+        dst[0] = (src[0]/2 + src[1]/2);
+        dst[1] = (src[2]/2 + src[3]/2);
+        dst[2] = (src[4]/2 + src[5]/2);
+        dst[3] = (src[6]/2 + src[7]/2);
+        src += 8;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        /* Avoid overflow by performing division first. We accept a
+         * difference of +/- 1 to the ideal result. */
+        dst[0] = (src[0]/2 + src[1]/2);
+        src += 2;
+        dst += 1;
+    }
+}
+
 static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     unsigned i;
 
@@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
     }
 }
 
+static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        dst[4] = dst[5] = dst[6] = dst[7] = src[1];
+        dst[8] = dst[9] = dst[10] = dst[11] = src[2];
+        dst[12] = dst[13] = dst[14] = dst[15] = src[3];
+        src += 4;
+        dst += 16;
+    }
+    for (i = n & 3; i; i--) {
+        dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+        src++;
+        dst += 4;
+    }
+}
+
 static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     unsigned i;
 
@@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
     }
 }
 
+static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    unsigned i;
+
+    for (i = n >> 2; i > 0; i--) {
+        /* Avoid overflow by performing division first. We accept a
+         * difference of +/- 3 to the ideal result. */
+        dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
+        dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4);
+        dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4);
+        dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4);
+        src += 16;
+        dst += 4;
+    }
+    for (i = n & 3; i; i--) {
+        /* Avoid overflow by performing division first. We accept a
+         * difference of +/- 3 to the ideal result. */
+        dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
+        src += 4;
+        dst += 1;
+    }
+}
+
 static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     unsigned i;
 
@@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int
     }
 }
 
+static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    unsigned oc, ic, i;
+    unsigned n_ic, n_oc;
+
+    n_ic = m->i_ss.channels;
+    n_oc = m->o_ss.channels;
+
+    memset(dst, 0, n * sizeof(int32_t) * n_oc);
+
+    for (oc = 0; oc < n_oc; oc++) {
+
+        for (ic = 0; ic < n_ic; ic++) {
+            int32_t *d = dst + oc;
+            const int32_t *s = src + ic;
+            int32_t vol = m->map_table_i[oc][ic];
+
+            if (vol <= 0)
+                continue;
+
+            if (vol >= 0x10000) {
+                for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                    *d += *s;
+            } else {
+                for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                    *d += (int32_t) (((int64_t)*s * vol) >> 16);
+            }
+        }
+    }
+}
+
 static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     unsigned oc, ic, i;
     unsigned n_ic, n_oc;
@@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
     }
 }
 
+static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    const unsigned n_ic = m->i_ss.channels;
+    const int8_t *arrange = m->state;
+
+    src += arrange[0];
+    for (; n > 0; n--) {
+        *dst++ = *src;
+        src += n_ic;
+    }
+}
+
+static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    const unsigned n_ic = m->i_ss.channels;
+    const int8_t *arrange = m->state;
+    const int ic0 = arrange[0], ic1 = arrange[1];
+
+    for (; n > 0; n--) {
+        *dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
+        *dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
+        src += n_ic;
+    }
+}
+
+static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    const unsigned n_ic = m->i_ss.channels;
+    const int8_t *arrange = m->state;
+    const int ic0 = arrange[0], ic1 = arrange[1],
+        ic2 = arrange[2], ic3 = arrange[3];
+
+    for (; n > 0; n--) {
+        *dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
+        *dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
+        *dst++ = (ic2 >= 0) ? *(src + ic2) : 0;
+        *dst++ = (ic3 >= 0) ? *(src + ic3) : 0;
+        src += n_ic;
+    }
+}
+
 static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     const unsigned n_ic = m->i_ss.channels;
     const int8_t *arrange = m->state;
@@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float
 }
 
 void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
-    pa_do_remap_func_t func_float) {
+    pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) {
 
     pa_assert(m);
 
     if (m->format == PA_SAMPLE_S16NE)
         m->do_remap = func_s16;
+    else if (m->format == PA_SAMPLE_S32NE)
+        m->do_remap = func_s32;
     else if (m->format == PA_SAMPLE_FLOAT32NE)
         m->do_remap = func_float;
     else
         pa_assert_not_reached();
+    pa_assert(m->do_remap);
 }
 
 static bool force_generic_code = false;
@@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) {
     if (force_generic_code) {
         pa_log_info("Forced to use generic matrix remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
+            (pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
             (pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
         return;
     }
@@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using mono to stereo remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
+            (pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c,
             (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
     } else if (n_ic == 2 && n_oc == 1 &&
             m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
 
         pa_log_info("Using stereo to mono remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
+            (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c,
             (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c);
     } else if (n_ic == 1 && n_oc == 4 &&
             m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
@@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using mono to 4-channel remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
+            (pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c,
             (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
     } else if (n_ic == 4 && n_oc == 1 &&
             m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
@@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using 4-channel to mono remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
+            (pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c,
             (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c);
     } else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) {
 
         pa_log_info("Using mono arrange remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c,
+            (pa_do_remap_func_t) remap_arrange_mono_s32ne_c,
             (pa_do_remap_func_t) remap_arrange_mono_float32ne_c);
 
         /* setup state */
@@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using stereo arrange remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c,
+            (pa_do_remap_func_t) remap_arrange_stereo_s32ne_c,
             (pa_do_remap_func_t) remap_arrange_stereo_float32ne_c);
 
         /* setup state */
@@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using 4-channel arrange remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c,
+            (pa_do_remap_func_t) remap_arrange_ch4_s32ne_c,
             (pa_do_remap_func_t) remap_arrange_ch4_float32ne_c);
 
         /* setup state */
@@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) {
 
         pa_log_info("Using generic matrix remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
+            (pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
             (pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
     }
 }


=====================================
src/pulsecore/remap.h
=====================================
@@ -55,6 +55,6 @@ void pa_set_init_remap_func(pa_init_remap_func_t func);
 bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]);
 
 void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
-    pa_do_remap_func_t func_float);
+    pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float);
 
 #endif /* fooremapfoo */


=====================================
src/pulsecore/remap_mmx.c
=====================================
@@ -111,7 +111,8 @@ static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const in
     );
 }
 
-static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     pa_reg_x86 temp, temp2;
 
     __asm__ __volatile__ (
@@ -135,7 +136,8 @@ static void init_remap_mmx(pa_remap_t *m) {
 
         pa_log_info("Using MMX mono to stereo remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx,
-            (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx);
+            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx,
+            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx);
     }
 }
 #endif /* defined (__i386__) || defined (__amd64__) */


=====================================
src/pulsecore/remap_neon.c
=====================================
@@ -143,6 +143,25 @@ static void remap_stereo_to_mono_float32ne_neon(pa_remap_t *m, float *dst, const
     }
 }
 
+static void remap_stereo_to_mono_s32ne_neon(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+    for (; n >= 4; n -= 4) {
+        __asm__ __volatile__ (
+            "vld2.32    {q0,q1}, [%[src]]!      \n\t"
+            "vrhadd.s32 q0, q0, q1              \n\t"
+            "vst1.32    {q0}, [%[dst]]!         \n\t"
+            : [dst] "+r" (dst), [src] "+r" (src) /* output operands */
+            : /* input operands */
+            : "memory", "q0", "q1" /* clobber list */
+        );
+    }
+
+    for (; n > 0; n--) {
+        dst[0] = src[0]/2 + src[1]/2;
+        src += 2;
+        dst++;
+    }
+}
+
 static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
     for (; n >= 8; n -= 8) {
         __asm__ __volatile__ (
@@ -322,7 +341,8 @@ static void remap_arrange_stereo_float32ne_neon(pa_remap_t *m, float *dst, const
     }
 }
 
-static void remap_arrange_ch2_ch4_float32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_arrange_ch2_ch4_any32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     const uint8x8_t t0 = ((uint8x8_t *)m->state)[0];
     const uint8x8_t t1 = ((uint8x8_t *)m->state)[1];
 
@@ -365,39 +385,52 @@ static void init_remap_neon(pa_remap_t *m) {
     n_oc = m->o_ss.channels;
     n_ic = m->i_ss.channels;
 
+    /* We short-circuit remap function selection for S32NE in most
+     * cases as the corresponding generic C code is performing
+     * similarly or even better. However there are a few cases where
+     * there actually is a significant improvement from using
+     * hand-crafted NEON assembly so we cannot just bail out for S32NE
+     * here. */
     if (n_ic == 1 && n_oc == 2 &&
             m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) {
+        if (m->format == PA_SAMPLE_S32NE)
+            return;
         if (arm_flags & PA_CPU_ARM_CORTEX_A8) {
 
             pa_log_info("Using ARM NEON/A8 mono to stereo remapping");
             pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
-                (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8);
+                NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8);
         }
         else {
             pa_log_info("Using ARM NEON mono to stereo remapping");
             pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
-                (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm);
+                NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm);
         }
     } else if (n_ic == 1 && n_oc == 4 &&
             m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
             m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) {
 
+        if (m->format == PA_SAMPLE_S32NE)
+            return;
         pa_log_info("Using ARM NEON mono to 4-channel remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon,
-            (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon);
+            NULL, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon);
     } else if (n_ic == 2 && n_oc == 1 &&
             m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
 
         pa_log_info("Using ARM NEON stereo to mono remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon,
+            (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_neon,
             (pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon);
     } else if (n_ic == 4 && n_oc == 1 &&
             m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
             m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) {
 
+        if (m->format == PA_SAMPLE_S32NE)
+            return;
         pa_log_info("Using ARM NEON 4-channel to mono remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon,
-            (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon);
+            NULL, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon);
     } else if (pa_setup_remap_arrange(m, arrange) &&
         ((n_ic == 2 && n_oc == 2) ||
          (n_ic == 2 && n_oc == 4) ||
@@ -405,17 +438,22 @@ static void init_remap_neon(pa_remap_t *m) {
         unsigned o;
 
         if (n_ic == 2 && n_oc == 2) {
+            if (m->format == PA_SAMPLE_S32NE)
+                return;
             pa_log_info("Using NEON stereo arrange remapping");
             pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon,
-                (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon);
+                NULL, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon);
         } else if (n_ic == 2 && n_oc == 4) {
             pa_log_info("Using NEON 2-channel to 4-channel arrange remapping");
             pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon,
-                (pa_do_remap_func_t) remap_arrange_ch2_ch4_float32ne_neon);
+                (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon,
+                (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon);
         } else if (n_ic == 4 && n_oc == 4) {
+            if (m->format == PA_SAMPLE_S32NE)
+                return;
             pa_log_info("Using NEON 4-channel arrange remapping");
             pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon,
-                (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon);
+                NULL, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon);
         }
 
         /* setup state */
@@ -436,6 +474,8 @@ static void init_remap_neon(pa_remap_t *m) {
             }
             break;
         }
+        case PA_SAMPLE_S32NE:
+                /* fall-through */
         case PA_SAMPLE_FLOAT32NE: {
             uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2);
             for (o = 0; o < n_oc; o++) {
@@ -461,8 +501,11 @@ static void init_remap_neon(pa_remap_t *m) {
     } else if (n_ic == 4 && n_oc == 4) {
         unsigned i, o;
 
+        if (m->format == PA_SAMPLE_S32NE)
+            return;
         pa_log_info("Using ARM NEON 4-channel remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon,
+            (pa_do_remap_func_t) NULL,
             (pa_do_remap_func_t) remap_ch4_float32ne_neon);
 
         /* setup state */


=====================================
src/pulsecore/remap_sse.c
=====================================
@@ -110,7 +110,8 @@ static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, int16_t *dst, const i
     );
 }
 
-static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_mono_to_stereo_any32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) {
     pa_reg_x86 temp, temp2;
 
     __asm__ __volatile__ (
@@ -134,7 +135,8 @@ static void init_remap_sse2(pa_remap_t *m) {
 
         pa_log_info("Using SSE2 mono to stereo remapping");
         pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2,
-            (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2);
+            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2,
+            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2);
     }
 }
 #endif /* defined (__i386__) || defined (__amd64__) */


=====================================
src/pulsecore/resampler.c
=====================================
@@ -286,6 +286,14 @@ static pa_sample_format_t choose_work_format(
                 work_format = a;
                 break;
             }
+            /* If both input and output are using S32NE and we don't
+             * need any resampling we can use S32NE directly, avoiding
+             * converting back and forth between S32NE and
+             * FLOAT32NE. */
+            if ((a == PA_SAMPLE_S32NE) && (b == PA_SAMPLE_S32NE)) {
+                work_format = PA_SAMPLE_S32NE;
+                break;
+            }
             /* Else fall through */
         case PA_RESAMPLER_PEAKS:
             /* PEAKS, COPY and TRIVIAL do not benefit from increased


=====================================
src/tests/cpu-remap-test.c
=====================================
@@ -141,6 +141,60 @@ static void run_remap_test_s16(
     }
 }
 
+
+static void run_remap_test_s32(
+        pa_remap_t *remap_func,
+        pa_remap_t *remap_orig,
+        int align,
+        bool correct,
+        bool perf) {
+
+    PA_DECLARE_ALIGNED(8, int32_t, out_buf_ref[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int32_t, out_buf[SAMPLES*8]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int32_t, in_buf[SAMPLES*8]);
+    int32_t *out, *out_ref;
+    int32_t *in;
+    unsigned n_ic = remap_func->i_ss.channels;
+    unsigned n_oc = remap_func->o_ss.channels;
+    unsigned i, nsamples;
+
+    pa_assert(n_ic >= 1 && n_ic <= 8);
+    pa_assert(n_oc >= 1 && n_oc <= 8);
+
+    /* Force sample alignment as requested */
+    out = out_buf + (8 - align);
+    out_ref = out_buf_ref + (8 - align);
+    in = in_buf + (8 - align);
+    nsamples = SAMPLES - (8 - align);
+
+    pa_random(in, nsamples * n_ic * sizeof(int32_t));
+
+    if (correct) {
+        remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
+        remap_func->do_remap(remap_func, out, in, nsamples);
+
+        for (i = 0; i < nsamples * n_oc; i++) {
+            if (abs(out[i] - out_ref[i]) > 4) {
+                pa_log_debug("Correctness test failed: align=%d", align);
+                pa_log_debug("%d: %d != %d", i, out[i], out_ref[i]);
+                ck_abort();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing remap performance with %d sample alignment", align);
+
+        PA_RUNTIME_TEST_RUN_START("func", TIMES, TIMES2) {
+            remap_func->do_remap(remap_func, out, in, nsamples);
+        } PA_RUNTIME_TEST_RUN_STOP
+
+        PA_RUNTIME_TEST_RUN_START("orig", TIMES, TIMES2) {
+            remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
+        } PA_RUNTIME_TEST_RUN_STOP
+    }
+}
+
 static void setup_remap_channels(
     pa_remap_t *m,
     pa_sample_format_t f,
@@ -193,6 +247,12 @@ static void remap_test_channels(
         run_remap_test_float(remap_func, remap_orig, 2, true, false);
         run_remap_test_float(remap_func, remap_orig, 3, true, true);
         break;
+    case PA_SAMPLE_S32NE:
+        run_remap_test_s32(remap_func, remap_orig, 0, true, false);
+        run_remap_test_s32(remap_func, remap_orig, 1, true, false);
+        run_remap_test_s32(remap_func, remap_orig, 2, true, false);
+        run_remap_test_s32(remap_func, remap_orig, 3, true, true);
+        break;
     case PA_SAMPLE_S16NE:
         run_remap_test_s16(remap_func, remap_orig, 0, true, false);
         run_remap_test_s16(remap_func, remap_orig, 1, true, false);
@@ -212,7 +272,7 @@ static void remap_init_test_channels(
         unsigned out_channels,
         bool rearrange) {
 
-    pa_remap_t remap_orig, remap_func;
+    pa_remap_t remap_orig = {0}, remap_func = {0};
 
     setup_remap_channels(&remap_orig, f, in_channels, out_channels, rearrange);
     orig_init_func(&remap_orig);
@@ -251,6 +311,11 @@ START_TEST (remap_special_test) {
     pa_log_debug("Checking special remap (float, mono->4-channel)");
     remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false);
 
+    pa_log_debug("Checking special remap (s32, mono->stereo)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 2, false);
+    pa_log_debug("Checking special remap (s32, mono->4-channel)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 4, false);
+
     pa_log_debug("Checking special remap (s16, mono->stereo)");
     remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false);
     pa_log_debug("Checking special remap (s16, mono->4-channel)");
@@ -261,6 +326,11 @@ START_TEST (remap_special_test) {
     pa_log_debug("Checking special remap (float, 4-channel->mono)");
     remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false);
 
+    pa_log_debug("Checking special remap (s32, stereo->mono)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 1, false);
+    pa_log_debug("Checking special remap (s32, 4-channel->mono)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 1, false);
+
     pa_log_debug("Checking special remap (s16, stereo->mono)");
     remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false);
     pa_log_debug("Checking special remap (s16, 4-channel->mono)");
@@ -271,11 +341,15 @@ END_TEST
 START_TEST (rearrange_special_test) {
     pa_log_debug("Checking special remap (s16, stereo rearrange)");
     remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true);
+    pa_log_debug("Checking special remap (s32, stereo rearrange)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 2, true);
     pa_log_debug("Checking special remap (float, stereo rearrange)");
     remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true);
 
     pa_log_debug("Checking special remap (s16, 4-channel rearrange)");
     remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true);
+    pa_log_debug("Checking special remap (s32, 4-channel rearrange)");
+    remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 4, true);
     pa_log_debug("Checking special remap (float, 4-channel rearrange)");
     remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true);
 }
@@ -298,6 +372,9 @@ START_TEST (remap_mmx_test) {
     init_func = pa_get_init_remap_func();
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
 
+    pa_log_debug("Checking MMX remap (s32, mono->stereo)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+
     pa_log_debug("Checking MMX remap (s16, mono->stereo)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
 }
@@ -319,6 +396,9 @@ START_TEST (remap_sse2_test) {
     init_func = pa_get_init_remap_func();
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
 
+    pa_log_debug("Checking SSE2 remap (s32, mono->stereo)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+
     pa_log_debug("Checking SSE2 remap (s16, mono->stereo)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
 }
@@ -345,6 +425,11 @@ START_TEST (remap_neon_test) {
     pa_log_debug("Checking NEON remap (float, mono->4-channel)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false);
 
+    pa_log_debug("Checking NEON remap (s32, mono->stereo)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+    pa_log_debug("Checking NEON remap (s32, mono->4-channel)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 4, false);
+
     pa_log_debug("Checking NEON remap (s16, mono->stereo)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
     pa_log_debug("Checking NEON remap (s16, mono->4-channel)");
@@ -355,6 +440,11 @@ START_TEST (remap_neon_test) {
     pa_log_debug("Checking NEON remap (float, 4-channel->mono)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false);
 
+    pa_log_debug("Checking NEON remap (s32, stereo->mono)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 1, false);
+    pa_log_debug("Checking NEON remap (s32, 4-channel->mono)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 1, false);
+
     pa_log_debug("Checking NEON remap (s16, stereo->mono)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false);
     pa_log_debug("Checking NEON remap (s16, 4-channel->mono)");
@@ -362,6 +452,8 @@ START_TEST (remap_neon_test) {
 
     pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false);
+    pa_log_debug("Checking NEON remap (s32, 4-channel->4-channel)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, false);
     pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false);
 }
@@ -383,11 +475,22 @@ START_TEST (rearrange_neon_test) {
 
     pa_log_debug("Checking NEON remap (float, stereo rearrange)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true);
+    pa_log_debug("Checking NEON remap (s32, stereo rearrange)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 2, true);
     pa_log_debug("Checking NEON remap (s16, stereo rearrange)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true);
 
+    pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true);
+    pa_log_debug("Checking NEON remap (s32, 2-channel->4-channel rearrange)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 4, true);
+    pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true);
+
     pa_log_debug("Checking NEON remap (float, 4-channel rearrange)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true);
+    pa_log_debug("Checking NEON remap (s32, 4-channel rearrange)");
+    remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, true);
     pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)");
     remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true);
 }



View it on GitLab: https://gitlab.freedesktop.org/pulseaudio/pulseaudio/compare/993d3fd89e5611997f1e165bf03edefb0204b0a4...034b77823ad45b5f02baaeea436863ed104ee66d

-- 
View it on GitLab: https://gitlab.freedesktop.org/pulseaudio/pulseaudio/compare/993d3fd89e5611997f1e165bf03edefb0204b0a4...034b77823ad45b5f02baaeea436863ed104ee66d
You're receiving this email because of your account on gitlab.freedesktop.org.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/pulseaudio-commits/attachments/20190329/adc6c547/attachment-0001.html>


More information about the pulseaudio-commits mailing list