[pulseaudio-commits] 23 commits - configure.ac src/.gitignore src/Makefile.am src/modules src/pulsecore src/tests

Fri Feb 15 13:25:26 PST 2013

configure.ac                                 |    4 
 src/.gitignore                               |    1 
 src/Makefile.am                              |   15 
 src/modules/echo-cancel/adrian-aec.c         |   32 -
 src/modules/echo-cancel/adrian-aec.h         |    3 
 src/modules/echo-cancel/adrian.c             |    6 
 src/modules/echo-cancel/adrian.h             |    1 
 src/modules/echo-cancel/echo-cancel.h        |    4 
 src/modules/echo-cancel/module-echo-cancel.c |   17 
 src/modules/echo-cancel/speex.c              |   12 
 src/modules/module-ladspa-sink.c             |    4 
 src/modules/module-virtual-source.c          |    1 
 src/pulsecore/cpu-arm.c                      |    4 
 src/pulsecore/cpu-arm.h                      |    1 
 src/pulsecore/mix.c                          |  725 +++++++++++++++++++++++++++
 src/pulsecore/mix.h                          |   64 ++
 src/pulsecore/mix_neon.c                     |   94 +++
 src/pulsecore/resampler.c                    |   44 +
 src/pulsecore/sample-util.c                  |  645 ------------------------
 src/pulsecore/sample-util.h                  |   48 -
 src/pulsecore/sink-input.c                   |    2 
 src/pulsecore/sink.c                         |    1 
 src/pulsecore/sound-file-stream.c            |    1 
 src/pulsecore/source-output.c                |    2 
 src/pulsecore/source.c                       |    2 
 src/pulsecore/svolume_c.c                    |   42 -
 src/tests/cpu-test.c                         |  160 +++++
 src/tests/mix-test.c                         |    1 
 src/tests/mult-s16-test.c                    |  145 +++++
 src/tests/volume-test.c                      |    1 
 30 files changed, 1321 insertions(+), 761 deletions(-)

New commits:
commit cf8e67ede8b6a905e2f742e5e92faaf060665d36
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:27:10 2013 +0100

    tests: Add NEON mix test cases to cpu-test
    
    on beagle-xm (Cortex-A8)
    
    Initialising ARM NEON optimized mixing functions.
    Checking NEON mix
    Testing 2-channel mixing performance with 7 sample alignment
    func: 2329073 usec (avg: 23290.7, min = 18127, max = 65368, stddev = 10404.2).
    orig: 7931126 usec (avg: 79311.3, min = 65002, max = 239411, stddev = 35885.6).
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/tests/cpu-test.c b/src/tests/cpu-test.c
index 16259de..aa4527a 100644
--- a/src/tests/cpu-test.c
+++ b/src/tests/cpu-test.c
@@ -34,6 +34,7 @@
 #include <pulsecore/sconv.h>
 #include <pulsecore/remap.h>
 #include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 
 #define PA_CPU_TEST_RUN_START(l, t1, t2)                        \
 {                                                               \
@@ -698,6 +699,156 @@ END_TEST
 #undef TIMES2
 /* End remap tests */
 
+/* Start mix tests */
+
+/* Only ARM NEON has mix tests, so disable the related functions for other
+ * architectures for now to avoid compiler warnings about unused functions. */
+#if defined (__arm__) && defined (__linux__)
+#ifdef HAVE_NEON
+
+#define SAMPLES 1028
+#define TIMES 1000
+#define TIMES2 100
+
+static void acquire_mix_streams(pa_mix_info streams[], unsigned nstreams) {
+    unsigned i;
+
+    for (i = 0; i < nstreams; i++)
+        streams[i].ptr = pa_memblock_acquire_chunk(&streams[i].chunk);
+}
+
+static void release_mix_streams(pa_mix_info streams[], unsigned nstreams) {
+    unsigned i;
+
+    for (i = 0; i < nstreams; i++)
+        pa_memblock_release(streams[i].chunk.memblock);
+}
+
+static void run_mix_test(
+        pa_do_mix_func_t func,
+        pa_do_mix_func_t orig_func,
+        int align,
+        int channels,
+        pa_bool_t correct,
+        pa_bool_t perf) {
+
+    PA_DECLARE_ALIGNED(8, int16_t, in0[SAMPLES * 4]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, in1[SAMPLES * 4]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, out[SAMPLES * 4]) = { 0 };
+    PA_DECLARE_ALIGNED(8, int16_t, out_ref[SAMPLES * 4]) = { 0 };
+    int16_t *samples0, *samples1;
+    int16_t *samples, *samples_ref;
+    int nsamples;
+    pa_mempool *pool;
+    pa_memchunk c0, c1;
+    pa_mix_info m[2];
+    int i;
+
+    pa_assert(channels == 1 || channels == 2 || channels == 4);
+
+    /* Force sample alignment as requested */
+    samples0 = in0 + (8 - align);
+    samples1 = in1 + (8 - align);
+    samples = out + (8 - align);
+    samples_ref = out_ref + (8 - align);
+    nsamples = channels * (SAMPLES - (8 - align));
+
+    fail_unless((pool = pa_mempool_new(FALSE, 0)) != NULL, NULL);
+
+    pa_random(samples0, nsamples * sizeof(int16_t));
+    c0.memblock = pa_memblock_new_fixed(pool, samples0, nsamples * sizeof(int16_t), FALSE);
+    c0.length = pa_memblock_get_length(c0.memblock);
+    c0.index = 0;
+
+    pa_random(samples1, nsamples * sizeof(int16_t));
+    c1.memblock = pa_memblock_new_fixed(pool, samples1, nsamples * sizeof(int16_t), FALSE);
+    c1.length = pa_memblock_get_length(c1.memblock);
+    c1.index = 0;
+
+    m[0].chunk = c0;
+    m[0].volume.channels = channels;
+    for (i = 0; i < channels; i++) {
+        m[0].volume.values[i] = PA_VOLUME_NORM;
+        m[0].linear[i].i = 0x5555;
+    }
+
+    m[1].chunk = c1;
+    m[1].volume.channels = channels;
+    for (i = 0; i < channels; i++) {
+        m[1].volume.values[i] = PA_VOLUME_NORM;
+        m[1].linear[i].i = 0x6789;
+    }
+
+    if (correct) {
+        acquire_mix_streams(m, 2);
+        orig_func(m, 2, channels, samples_ref, nsamples * sizeof(int16_t));
+        release_mix_streams(m, 2);
+
+        acquire_mix_streams(m, 2);
+        func(m, 2, channels, samples, nsamples * sizeof(int16_t));
+        release_mix_streams(m, 2);
+
+        for (i = 0; i < nsamples; i++) {
+            if (samples[i] != samples_ref[i]) {
+                pa_log_debug("Correctness test failed: align=%d, channels=%d", align, channels);
+                pa_log_debug("%d: %hd != %04hd (%hd + %hd)\n",
+                    i,
+                    samples[i], samples_ref[i],
+                    samples0[i], samples1[i]);
+                fail();
+            }
+        }
+    }
+
+    if (perf) {
+        pa_log_debug("Testing %d-channel mixing performance with %d sample alignment", channels, align);
+
+        PA_CPU_TEST_RUN_START("func", TIMES, TIMES2) {
+            acquire_mix_streams(m, 2);
+            func(m, 2, channels, samples, nsamples * sizeof(int16_t));
+            release_mix_streams(m, 2);
+        } PA_CPU_TEST_RUN_STOP
+
+        PA_CPU_TEST_RUN_START("orig", TIMES, TIMES2) {
+            acquire_mix_streams(m, 2);
+            orig_func(m, 2, channels, samples_ref, nsamples * sizeof(int16_t));
+            release_mix_streams(m, 2);
+        } PA_CPU_TEST_RUN_STOP
+    }
+
+    pa_memblock_unref(c0.memblock);
+    pa_memblock_unref(c1.memblock);
+
+    pa_mempool_free(pool);
+}
+#endif /* HAVE_NEON */
+#endif /* defined (__arm__) && defined (__linux__) */
+
+#if defined (__arm__) && defined (__linux__)
+#ifdef HAVE_NEON
+START_TEST (mix_neon_test) {
+    pa_do_mix_func_t orig_func, neon_func;
+    pa_cpu_arm_flag_t flags = 0;
+
+    pa_cpu_get_arm_flags(&flags);
+
+    if (!(flags & PA_CPU_ARM_NEON)) {
+        pa_log_info("NEON not supported. Skipping");
+        return;
+    }
+
+    orig_func = pa_get_mix_func(PA_SAMPLE_S16NE);
+    pa_mix_func_init_neon(flags);
+    neon_func = pa_get_mix_func(PA_SAMPLE_S16NE);
+
+    pa_log_debug("Checking NEON mix");
+    run_mix_test(neon_func, orig_func, 7, 2, TRUE, TRUE);
+}
+END_TEST
+#endif /* HAVE_NEON */
+#endif /* defined (__arm__) && defined (__linux__) */
+/* End mix tests */
+
 int main(int argc, char *argv[]) {
     int failed = 0;
     Suite *s;
@@ -744,6 +895,15 @@ int main(int argc, char *argv[]) {
 #endif
     tcase_set_timeout(tc, 120);
     suite_add_tcase(s, tc);
+    /* Mix tests */
+    tc = tcase_create("mix");
+#if defined (__arm__) && defined (__linux__)
+#if HAVE_NEON
+    tcase_add_test(tc, mix_neon_test);
+#endif
+#endif
+    tcase_set_timeout(tc, 120);
+    suite_add_tcase(s, tc);
 
     sr = srunner_create(s);
     srunner_run_all(sr, CK_NORMAL);

commit 1e4e586150b78e1d3999b9bcafecf34363ffff97
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:27:09 2013 +0100

    mix: Add optimized mix code path for ARM NEON
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/Makefile.am b/src/Makefile.am
index e551810..cf643da 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -889,10 +889,12 @@ libpulsecore_ at PA_MAJORMINOR@_la_LDFLAGS = $(AM_LDFLAGS) -avoid-version
 libpulsecore_ at PA_MAJORMINOR@_la_LIBADD = $(AM_LIBADD) $(LIBLTDL) $(LIBSAMPLERATE_LIBS) $(LIBSPEEX_LIBS) $(LIBSNDFILE_LIBS) $(WINSOCK_LIBS) $(LTLIBICONV) libpulsecommon- at PA_MAJORMINOR@.la libpulse.la libpulsecore-foreign.la
 
 if HAVE_NEON
-noinst_LTLIBRARIES += libpulsecore_sconv_neon.la
+noinst_LTLIBRARIES += libpulsecore_sconv_neon.la libpulsecore_mix_neon.la
 libpulsecore_sconv_neon_la_SOURCES = pulsecore/sconv_neon.c
 libpulsecore_sconv_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_CFLAGS)
-libpulsecore_ at PA_MAJORMINOR@_la_LIBADD += libpulsecore_sconv_neon.la
+libpulsecore_mix_neon_la_SOURCES = pulsecore/mix_neon.c
+libpulsecore_mix_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_CFLAGS)
+libpulsecore_ at PA_MAJORMINOR@_la_LIBADD += libpulsecore_sconv_neon.la libpulsecore_mix_neon.la
 endif
 
 if HAVE_ORC
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
index 05668f1..1378124 100644
--- a/src/pulsecore/cpu-arm.c
+++ b/src/pulsecore/cpu-arm.c
@@ -143,8 +143,10 @@ pa_bool_t pa_cpu_init_arm(pa_cpu_arm_flag_t *flags) {
     if (*flags & PA_CPU_ARM_V6)
         pa_volume_func_init_arm(*flags);
 #ifdef HAVE_NEON
-    if (*flags & PA_CPU_ARM_NEON)
+    if (*flags & PA_CPU_ARM_NEON) {
         pa_convert_func_init_neon(*flags);
+        pa_mix_func_init_neon(*flags);
+    }
 #endif
 
     return TRUE;
diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h
index d2d3f5c..d9dc3d5 100644
--- a/src/pulsecore/cpu-arm.h
+++ b/src/pulsecore/cpu-arm.h
@@ -47,6 +47,7 @@ void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags);
 
 #ifdef HAVE_NEON
 void pa_convert_func_init_neon(pa_cpu_arm_flag_t flags);
+void pa_mix_func_init_neon(pa_cpu_arm_flag_t flags);
 #endif
 
 #endif /* foocpuarmhfoo */
diff --git a/src/pulsecore/mix_neon.c b/src/pulsecore/mix_neon.c
new file mode 100644
index 0000000..ff05ccf
--- /dev/null
+++ b/src/pulsecore/mix_neon.c
@@ -0,0 +1,94 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2013 Peter Meerwald <pmeerw at pmeerw.net>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulsecore/macro.h>
+#include <pulsecore/endianmacros.h>
+
+#include "cpu-arm.h"
+#include "mix.h"
+
+#include <arm_neon.h>
+
+static pa_do_mix_func_t fallback;
+
+/* special case: mix s16ne streams, 2 channels each */
+static void pa_mix_ch2_s16ne_neon(pa_mix_info streams[], unsigned nstreams, uint8_t *data, unsigned length) {
+    const unsigned mask = sizeof(int16_t) * 8 - 1;
+    const uint8_t *end = data + (length & ~mask);
+
+    while (data < end) {
+        int32x4_t sum0, sum1;
+        unsigned i;
+
+        __asm__ __volatile__ (
+            "veor.s32 %q[sum0], %q[sum0]     \n\t"
+            "veor.s32 %q[sum1], %q[sum1]     \n\t"
+            : [sum0] "=w" (sum0), [sum1] "=w" (sum1)
+            :
+            : "cc" /* clobber list */
+        );
+
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv0 = m->linear[0].i;
+            int32_t cv1 = m->linear[1].i;
+
+            __asm__ __volatile__ (
+                "vld2.s16    {d0,d2}, [%[ptr]]!      \n\t"
+                "vmov.s32    d4[0], %[cv0]           \n\t"
+                "vmov.s32    d4[1], %[cv1]           \n\t"
+                "vshll.s16   q0, d0, #15             \n\t"
+                "vshll.s16   q1, d2, #15             \n\t"
+                "vqdmulh.s32 q0, q0, d4[0]           \n\t"
+                "vqdmulh.s32 q1, q1, d4[1]           \n\t"
+                "vqadd.s32   %q[sum0], %q[sum0], q0  \n\t"
+                "vqadd.s32   %q[sum1], %q[sum1], q1  \n\t"
+                : [ptr] "+r" (m->ptr), [sum0] "+w" (sum0), [sum1] "+w" (sum1)
+                : [cv0] "r" (cv0), [cv1] "r" (cv1)
+                : "memory", "cc", "q0", "q1", "d4" /* clobber list */
+            );
+        }
+
+        __asm__ __volatile__ (
+            "vqmovn.s32 d0, %q[sum0]         \n\t"
+            "vqmovn.s32 d1, %q[sum1]         \n\t"
+            "vst2.s16   {d0,d1}, [%[data]]!  \n\t"
+            : [data] "+r" (data)
+            : [sum0] "w" (sum0), [sum1] "w" (sum1)
+            : "memory", "cc", "q0" /* clobber list */
+        );
+    }
+
+    fallback(streams, nstreams, 2, data, length & mask);
+}
+
+static void pa_mix_s16ne_neon(pa_mix_info streams[], unsigned nstreams, unsigned nchannels, void *data, unsigned length) {
+    if (nchannels == 2)
+        pa_mix_ch2_s16ne_neon(streams, nstreams, data, length);
+    else
+        fallback(streams, nstreams, nchannels, data, length);
+}
+
+void pa_mix_func_init_neon(pa_cpu_arm_flag_t flags) {
+    pa_log_info("Initialising ARM NEON optimized mixing functions.");
+
+    fallback = pa_get_mix_func(PA_SAMPLE_S16NE);
+    pa_set_mix_func(PA_SAMPLE_S16NE, (pa_do_mix_func_t) pa_mix_s16ne_neon);
+}

commit 7758076d9c73f4fead3e643004286f4129a10daf
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:27:08 2013 +0100

    mix: Change end pointer to length parameter in mixing function
    
    similar to volume functions, simplifies leftover samples handling
    for SIMD'd code path
    
    use concrete pointer type (e.g. int16_t*) instead of void*,
    saves several casts
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 38d3b1e..786ed75 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -123,14 +123,16 @@ static const pa_calc_stream_volumes_func_t calc_stream_volumes_table[] = {
 };
 
 /* special case: mix 2 s16ne streams, 1 channel each */
-static void pa_mix2_ch1_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end) {
+static void pa_mix2_ch1_s16ne(pa_mix_info streams[], int16_t *data, unsigned length) {
     const int16_t *ptr0 = streams[0].ptr;
     const int16_t *ptr1 = streams[1].ptr;
 
     const int32_t cv0 = streams[0].linear[0].i;
     const int32_t cv1 = streams[1].linear[0].i;
 
-    while (data < end) {
+    length /= sizeof(int16_t);
+
+    for (; length > 0; length--) {
         int32_t sum;
 
         sum = pa_mult_s16_volume(*ptr0++, cv0);
@@ -142,11 +144,13 @@ static void pa_mix2_ch1_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end
 }
 
 /* special case: mix 2 s16ne streams, 2 channels each */
-static void pa_mix2_ch2_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end) {
+static void pa_mix2_ch2_s16ne(pa_mix_info streams[], int16_t *data, unsigned length) {
     const int16_t *ptr0 = streams[0].ptr;
     const int16_t *ptr1 = streams[1].ptr;
 
-    while (data < end) {
+    length /= sizeof(int16_t) * 2;
+
+    for (; length > 0; length--) {
         int32_t sum;
 
         sum = pa_mult_s16_volume(*ptr0++, streams[0].linear[0].i);
@@ -164,12 +168,14 @@ static void pa_mix2_ch2_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end
 }
 
 /* special case: mix 2 s16ne streams */
-static void pa_mix2_s16ne(pa_mix_info streams[], unsigned channels, int16_t *data, int16_t *end) {
+static void pa_mix2_s16ne(pa_mix_info streams[], unsigned channels, int16_t *data, unsigned length) {
     const int16_t *ptr0 = streams[0].ptr;
     const int16_t *ptr1 = streams[1].ptr;
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(int16_t);
+
+    for (; length > 0; length--) {
         int32_t sum;
 
         sum = pa_mult_s16_volume(*ptr0++, streams[0].linear[channel].i);
@@ -184,8 +190,11 @@ static void pa_mix2_s16ne(pa_mix_info streams[], unsigned channels, int16_t *dat
 }
 
 /* special case: mix s16ne streams, 2 channels each */
-static void pa_mix_ch2_s16ne(pa_mix_info streams[], unsigned nstreams, int16_t *data, int16_t *end) {
-    while (data < end) {
+static void pa_mix_ch2_s16ne(pa_mix_info streams[], unsigned nstreams, int16_t *data, unsigned length) {
+
+    length /= sizeof(int16_t) * 2;
+
+    for (; length > 0; length--) {
         int32_t sum0 = 0, sum1 = 0;
         unsigned i;
 
@@ -206,10 +215,12 @@ static void pa_mix_ch2_s16ne(pa_mix_info streams[], unsigned nstreams, int16_t *
     }
 }
 
-static void pa_mix_generic_s16ne(pa_mix_info streams[], unsigned nstreams, unsigned channels, int16_t *data, int16_t *end) {
+static void pa_mix_generic_s16ne(pa_mix_info streams[], unsigned nstreams, unsigned channels, int16_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(int16_t);
+
+    for (; length > 0; length--) {
         int32_t sum = 0;
         unsigned i;
 
@@ -223,32 +234,32 @@ static void pa_mix_generic_s16ne(pa_mix_info streams[], unsigned nstreams, unsig
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-        *data = sum;
-
-        data++;
+        *data++ = sum;
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, int16_t *data, unsigned length) {
     if (nstreams == 2 && channels == 1)
-        pa_mix2_ch1_s16ne(streams, data, end);
+        pa_mix2_ch1_s16ne(streams, data, length);
     else if (nstreams == 2 && channels == 2)
-        pa_mix2_ch2_s16ne(streams, data, end);
+        pa_mix2_ch2_s16ne(streams, data, length);
     else if (nstreams == 2)
-        pa_mix2_s16ne(streams, channels, data, end);
+        pa_mix2_s16ne(streams, channels, data, length);
     else if (channels == 2)
-        pa_mix_ch2_s16ne(streams, channels, data, end);
+        pa_mix_ch2_s16ne(streams, channels, data, length);
     else
-        pa_mix_generic_s16ne(streams, nstreams, channels, data, end);
+        pa_mix_generic_s16ne(streams, nstreams, channels, data, length);
 }
 
-static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, int16_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(int16_t);
+
+    for (; length > 0; length--, data++) {
         int32_t sum = 0;
         unsigned i;
 
@@ -262,19 +273,19 @@ static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-        *((int16_t*) data) = PA_INT16_SWAP((int16_t) sum);
-
-        data = (uint8_t*) data + sizeof(int16_t);
+        *data = PA_INT16_SWAP((int16_t) sum);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, int32_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(int32_t);
+
+    for (; length > 0; length--, data++) {
         int64_t sum = 0;
         unsigned i;
 
@@ -292,19 +303,19 @@ static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-        *((int32_t*) data) = (int32_t) sum;
-
-        data = (uint8_t*) data + sizeof(int32_t);
+        *data = (int32_t) sum;
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, int32_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(int32_t);
+
+    for (; length > 0; length--, data++) {
         int64_t sum = 0;
         unsigned i;
 
@@ -322,19 +333,17 @@ static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-        *((int32_t*) data) = PA_INT32_SWAP((int32_t) sum);
-
-        data = (uint8_t*) data + sizeof(int32_t);
+        *data = PA_INT32_SWAP((int32_t) sum);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint8_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    for (; length > 0; length -= 3, data += 3) {
         int64_t sum = 0;
         unsigned i;
 
@@ -354,17 +363,15 @@ static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
         PA_WRITE24NE(data, ((uint32_t) sum) >> 8);
 
-        data = (uint8_t*) data + 3;
-
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint8_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    for (; length > 0; length -= 3, data += 3) {
         int64_t sum = 0;
         unsigned i;
 
@@ -384,17 +391,17 @@ static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
         PA_WRITE24RE(data, ((uint32_t) sum) >> 8);
 
-        data = (uint8_t*) data + 3;
-
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint32_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(uint32_t);
+
+    for (; length > 0; length--, data++) {
         int64_t sum = 0;
         unsigned i;
 
@@ -412,19 +419,19 @@ static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-        *((uint32_t*) data) = ((uint32_t) (int32_t) sum) >> 8;
-
-        data = (uint8_t*) data + sizeof(uint32_t);
+        *data = ((uint32_t) (int32_t) sum) >> 8;
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint32_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(uint32_t);
+
+    for (; length > 0; length--, data++) {
         int64_t sum = 0;
         unsigned i;
 
@@ -442,19 +449,19 @@ static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-        *((uint32_t*) data) = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
-
-        data = (uint8_t*) data + sizeof(uint32_t);
+        *data = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint8_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(uint8_t);
+
+    for (; length > 0; length--, data++) {
         int32_t sum = 0;
         unsigned i;
 
@@ -471,19 +478,19 @@ static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned chann
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x80, 0x7F);
-        *((uint8_t*) data) = (uint8_t) (sum + 0x80);
-
-        data = (uint8_t*) data + 1;
+        *data = (uint8_t) (sum + 0x80);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint8_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(uint8_t);
+
+    for (; length > 0; length--, data++) {
         int32_t sum = 0;
         unsigned i;
 
@@ -497,19 +504,19 @@ static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-        *((uint8_t*) data) = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
-
-        data = (uint8_t*) data + 1;
+        *data = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, uint8_t *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(uint8_t);
+
+    for (; length > 0; length--, data++) {
         int32_t sum = 0;
         unsigned i;
 
@@ -523,19 +530,19 @@ static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-        *((uint8_t*) data) = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
-
-        data = (uint8_t*) data + 1;
+        *data = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, float *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(float);
+
+    for (; length > 0; length--, data++) {
         float sum = 0;
         unsigned i;
 
@@ -551,19 +558,19 @@ static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigne
             m->ptr = (uint8_t*) m->ptr + sizeof(float);
         }
 
-        *((float*) data) = sum;
-
-        data = (uint8_t*) data + sizeof(float);
+        *data = sum;
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
-static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, float *data, unsigned length) {
     unsigned channel = 0;
 
-    while (data < end) {
+    length /= sizeof(float);
+
+    for (; length > 0; length--, data++) {
         float sum = 0;
         unsigned i;
 
@@ -579,9 +586,7 @@ static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigne
             m->ptr = (uint8_t*) m->ptr + sizeof(float);
         }
 
-        *((float*) data) = PA_FLOAT32_SWAP(sum);
-
-        data = (uint8_t*) data + sizeof(float);
+        *data = PA_FLOAT32_SWAP(sum);
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
@@ -615,7 +620,6 @@ size_t pa_mix(
 
     pa_cvolume full_volume;
     unsigned k;
-    void *end;
 
     pa_assert(streams);
     pa_assert(data);
@@ -636,10 +640,8 @@ size_t pa_mix(
             length = streams[k].chunk.length;
     }
 
-    end = (uint8_t*) data + length;
-
     calc_stream_volumes_table[spec->format](streams, nstreams, volume, spec);
-    do_mix_table[spec->format](streams, nstreams, spec->channels, data, end);
+    do_mix_table[spec->format](streams, nstreams, spec->channels, data, length);
 
     for (k = 0; k < nstreams; k++)
         pa_memblock_release(streams[k].chunk.memblock);
diff --git a/src/pulsecore/mix.h b/src/pulsecore/mix.h
index 27198fc..e90652a 100644
--- a/src/pulsecore/mix.h
+++ b/src/pulsecore/mix.h
@@ -51,7 +51,7 @@ size_t pa_mix(
     const pa_cvolume *volume,
     pa_bool_t mute);
 
-typedef void (*pa_do_mix_func_t) (pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end);
+typedef void (*pa_do_mix_func_t) (pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, unsigned length);
 
 pa_do_mix_func_t pa_get_mix_func(pa_sample_format_t f);
 void pa_set_mix_func(pa_sample_format_t f, pa_do_mix_func_t func);

commit c1cac8d82bedeb771bb2791b9297e69ce0290736
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:07 2013 +0100

    mix: Add special cases for mixing streams in s16ne format
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index b2bbeb7..38d3b1e 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -122,7 +122,91 @@ static const pa_calc_stream_volumes_func_t calc_stream_volumes_table[] = {
   [PA_SAMPLE_S24_32BE]  = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes
 };
 
-static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+/* special case: mix 2 s16ne streams, 1 channel each */
+static void pa_mix2_ch1_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end) {
+    const int16_t *ptr0 = streams[0].ptr;
+    const int16_t *ptr1 = streams[1].ptr;
+
+    const int32_t cv0 = streams[0].linear[0].i;
+    const int32_t cv1 = streams[1].linear[0].i;
+
+    while (data < end) {
+        int32_t sum;
+
+        sum = pa_mult_s16_volume(*ptr0++, cv0);
+        sum += pa_mult_s16_volume(*ptr1++, cv1);
+
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *data++ = sum;
+    }
+}
+
+/* special case: mix 2 s16ne streams, 2 channels each */
+static void pa_mix2_ch2_s16ne(pa_mix_info streams[], int16_t *data, int16_t *end) {
+    const int16_t *ptr0 = streams[0].ptr;
+    const int16_t *ptr1 = streams[1].ptr;
+
+    while (data < end) {
+        int32_t sum;
+
+        sum = pa_mult_s16_volume(*ptr0++, streams[0].linear[0].i);
+        sum += pa_mult_s16_volume(*ptr1++, streams[1].linear[0].i);
+
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *data++ = sum;
+
+        sum = pa_mult_s16_volume(*ptr0++, streams[0].linear[1].i);
+        sum += pa_mult_s16_volume(*ptr1++, streams[1].linear[1].i);
+
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *data++ = sum;
+    }
+}
+
+/* special case: mix 2 s16ne streams */
+static void pa_mix2_s16ne(pa_mix_info streams[], unsigned channels, int16_t *data, int16_t *end) {
+    const int16_t *ptr0 = streams[0].ptr;
+    const int16_t *ptr1 = streams[1].ptr;
+    unsigned channel = 0;
+
+    while (data < end) {
+        int32_t sum;
+
+        sum = pa_mult_s16_volume(*ptr0++, streams[0].linear[channel].i);
+        sum += pa_mult_s16_volume(*ptr1++, streams[1].linear[channel].i);
+
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *data++ = sum;
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+/* special case: mix s16ne streams, 2 channels each */
+static void pa_mix_ch2_s16ne(pa_mix_info streams[], unsigned nstreams, int16_t *data, int16_t *end) {
+    while (data < end) {
+        int32_t sum0 = 0, sum1 = 0;
+        unsigned i;
+
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv0 = m->linear[0].i;
+            int32_t cv1 = m->linear[1].i;
+
+            sum0 += pa_mult_s16_volume(*((int16_t*) m->ptr), cv0);
+            m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
+
+            sum1 += pa_mult_s16_volume(*((int16_t*) m->ptr), cv1);
+            m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
+        }
+
+        *data++ = PA_CLAMP_UNLIKELY(sum0, -0x8000, 0x7FFF);
+        *data++ = PA_CLAMP_UNLIKELY(sum1, -0x8000, 0x7FFF);
+    }
+}
+
+static void pa_mix_generic_s16ne(pa_mix_info streams[], unsigned nstreams, unsigned channels, int16_t *data, int16_t *end) {
     unsigned channel = 0;
 
     while (data < end) {
@@ -134,20 +218,33 @@ static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
             int32_t cv = m->linear[channel].i;
 
             if (PA_LIKELY(cv > 0))
-                sum += pa_mult_s16_volume(*((int16_t*) m->ptr), m->linear[channel].i);
+                sum += pa_mult_s16_volume(*((int16_t*) m->ptr), cv);
             m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
         }
 
         sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-        *((int16_t*) data) = (int16_t) sum;
+        *data = sum;
 
-        data = (uint8_t*) data + sizeof(int16_t);
+        data++;
 
         if (PA_UNLIKELY(++channel >= channels))
             channel = 0;
     }
 }
 
+static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    if (nstreams == 2 && channels == 1)
+        pa_mix2_ch1_s16ne(streams, data, end);
+    else if (nstreams == 2 && channels == 2)
+        pa_mix2_ch2_s16ne(streams, data, end);
+    else if (nstreams == 2)
+        pa_mix2_s16ne(streams, channels, data, end);
+    else if (channels == 2)
+        pa_mix_ch2_s16ne(streams, channels, data, end);
+    else
+        pa_mix_generic_s16ne(streams, nstreams, channels, data, end);
+}
+
 static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
     unsigned channel = 0;
 

commit c23963a99f569786bb0c1efe712f62f0475552ae
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:06 2013 +0100

    tests: Add mult-s16 test
    
    test mostly compares runtime of 64 bit vs 32 bit s16ne-by-volume multiplication
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/.gitignore b/src/.gitignore
index ad8773b..a09e268 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -71,3 +71,4 @@ thread-test
 usergroup-test
 utf8-test
 volume-test
+mult-s16-test
diff --git a/src/Makefile.am b/src/Makefile.am
index 22b0409..e551810 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -245,7 +245,8 @@ TESTS_default = \
 		mix-test \
 		proplist-test \
 		cpu-test \
-		lock-autospawn-test
+		lock-autospawn-test \
+		mult-s16-test
 
 TESTS_norun = \
 		mcalign-test \
@@ -503,6 +504,11 @@ cpu_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpuls
 cpu_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
 cpu_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
 
+mult_s16_test_SOURCES = tests/mult-s16-test.c
+mult_s16_test_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
+mult_s16_test_CFLAGS = $(AM_CFLAGS) $(LIBCHECK_CFLAGS)
+mult_s16_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(LIBCHECK_LIBS)
+
 rtstutter_SOURCES = tests/rtstutter.c
 rtstutter_LDADD = $(AM_LDADD) libpulsecore- at PA_MAJORMINOR@.la libpulse.la libpulsecommon- at PA_MAJORMINOR@.la
 rtstutter_CFLAGS = $(AM_CFLAGS)
diff --git a/src/tests/mult-s16-test.c b/src/tests/mult-s16-test.c
new file mode 100644
index 0000000..15ed8f2
--- /dev/null
+++ b/src/tests/mult-s16-test.c
@@ -0,0 +1,145 @@
+/***
+  This file is part of PulseAudio.
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <check.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <pulse/rtclock.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+
+#define PA_CPU_TEST_RUN_START(l, t1, t2)                        \
+{                                                               \
+    int _j, _k;                                                 \
+    int _times = (t1), _times2 = (t2);                          \
+    pa_usec_t _start, _stop;                                    \
+    pa_usec_t _min = INT_MAX, _max = 0;                         \
+    double _s1 = 0, _s2 = 0;                                    \
+    const char *_label = (l);                                   \
+                                                                \
+    for (_k = 0; _k < _times2; _k++) {                          \
+        _start = pa_rtclock_now();                              \
+        for (_j = 0; _j < _times; _j++)
+
+#define PA_CPU_TEST_RUN_STOP                                    \
+        _stop = pa_rtclock_now();                               \
+                                                                \
+        if (_min > (_stop - _start)) _min = _stop - _start;     \
+        if (_max < (_stop - _start)) _max = _stop - _start;     \
+        _s1 += _stop - _start;                                  \
+        _s2 += (_stop - _start) * (_stop - _start);             \
+    }                                                           \
+    pa_log_debug("%s: %llu usec (avg: %g, min = %llu, max = %llu, stddev = %g).", _label, \
+            (long long unsigned int)_s1,                        \
+            ((double)_s1 / _times2),                            \
+            (long long unsigned int)_min,                       \
+            (long long unsigned int)_max,                       \
+            sqrt(_times2 * _s2 - _s1 * _s1) / _times2);         \
+}
+
+static inline int32_t pa_mult_s16_volume_32(int16_t v, int32_t cv) {
+    /* Multiplying the 32 bit volume factor with the
+     * 16 bit sample might result in an 48 bit value. We
+     * want to do without 64 bit integers and hence do
+     * the multiplication independently for the HI and
+     * LO part of the volume. */
+    int32_t hi = cv >> 16;
+    int32_t lo = cv & 0xFFFF;
+    return ((v * lo) >> 16) + (v * hi);
+}
+
+static inline int32_t pa_mult_s16_volume_64(int16_t v, int32_t cv) {
+    /* Multiply with 64 bit integers on 64 bit platforms */
+    return (v * (int64_t) cv) >> 16;
+}
+
+#define SAMPLES 1028
+#define TIMES 10000
+#define TIMES2 100
+
+START_TEST (mult_s16_test) {
+    int16_t samples[SAMPLES];
+    int32_t volumes[SAMPLES];
+    int32_t sum1 = 0, sum2 = 0;
+    int i;
+
+    pa_random(samples, sizeof(samples));
+    pa_random(volumes, sizeof(volumes));
+
+    for (i = 0; i < SAMPLES; i++) {
+        int32_t a = pa_mult_s16_volume_32(samples[i], volumes[i]);
+        int32_t b = pa_mult_s16_volume_64(samples[i], volumes[i]);
+
+        if (a != b) {
+            pa_log_debug("%d: %d != %d", i, a, b);
+            fail();
+        }
+    }
+
+    PA_CPU_TEST_RUN_START("32 bit mult", TIMES, TIMES2) {
+        for (i = 0; i < SAMPLES; i++) {
+            sum1 += pa_mult_s16_volume_32(samples[i], volumes[i]);
+        }
+    } PA_CPU_TEST_RUN_STOP
+
+    PA_CPU_TEST_RUN_START("64 bit mult", TIMES, TIMES2) {
+        for (i = 0; i < SAMPLES; i++)
+            sum2 += pa_mult_s16_volume_64(samples[i], volumes[i]);
+    } PA_CPU_TEST_RUN_STOP
+
+    fail_unless(sum1 == sum2);
+}
+END_TEST
+
+int main(int argc, char *argv[]) {
+    int failed = 0;
+    Suite *s;
+    TCase *tc;
+    SRunner *sr;
+
+    if (!getenv("MAKE_CHECK"))
+        pa_log_set_level(PA_LOG_DEBUG);
+
+#if __WORDSIZE == 64 || ((ULONG_MAX) > (UINT_MAX))
+    pa_log_debug("This seems to be 64-bit code.");
+#elif  __WORDSIZE == 32
+    pa_log_debug("This seems to be 32-bit code.");
+#else
+    pa_log_debug("Don't know if this is 32- or 64-bit code.");
+#endif
+
+    s = suite_create("Mult-s16");
+    tc = tcase_create("mult-s16");
+    tcase_add_test(tc, mult_s16_test);
+    tcase_set_timeout(tc, 120);
+    suite_add_tcase(s, tc);
+
+    sr = srunner_create(s);
+    srunner_run_all(sr, CK_NORMAL);
+    failed = srunner_ntests_failed(sr);
+    srunner_free(sr);
+
+    return (failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}

commit 8fa81a93c984523bf2bd5ad4b079bce8d14a9a4e
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:05 2013 +0100

    core: Refactor code to multiply s16 by volume
    
    move code to function pa_mult_s16_volume() in sample-util.h
    use 64 bit integers on 64 bit platforms (it's faster)
    
    on i5, 2.5GHz (64-bit)
    
    Running suite(s): Mult-s16
    32 bit mult: 1272300 usec (avg: 12723, min = 12533, max = 18749, stddev = 620.48).
    64 bit mult: 852241 usec (avg: 8522.41, min = 8420, max = 9148, stddev = 109.388).
    100%: Checks: 1, Failures: 0, Errors: 0
    
    on Pentium D, 3.4GHz (32-bit)
    
    Running suite(s): Mult-s16
    32 bit mult: 2228504 usec (avg: 22285, min = 18775, max = 29648, stddev = 3865.59).
    64 bit mult: 5546861 usec (avg: 55468.6, min = 55028, max = 64924, stddev = 978.981).
    100%: Checks: 1, Failures: 0, Errors: 0
    
    on TI DM3730, Cortex-A8, 800MHz (32-bit)
    
    Running suite(s): Mult-s16
    32 bit mult: 23708900 usec (avg: 237089, min = 191864, max = 557312, stddev = 77503.6).
    64 bit mult: 22190039 usec (avg: 221900, min = 177978, max = 480469, stddev = 68520.5).
    100%: Checks: 1, Failures: 0, Errors: 0
    
    there is a test program called mult-s16-test which checks that the functions compute the
    same results, and compares runtime
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 0db5f59..b2bbeb7 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -131,23 +131,10 @@ static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
 
         for (i = 0; i < nstreams; i++) {
             pa_mix_info *m = streams + i;
-            int32_t v, lo, hi, cv = m->linear[channel].i;
-
-            if (PA_LIKELY(cv > 0)) {
-
-                /* Multiplying the 32bit volume factor with the
-                 * 16bit sample might result in an 48bit value. We
-                 * want to do without 64 bit integers and hence do
-                 * the multiplication independently for the HI and
-                 * LO part of the volume. */
-
-                hi = cv >> 16;
-                lo = cv & 0xFFFF;
+            int32_t cv = m->linear[channel].i;
 
-                v = *((int16_t*) m->ptr);
-                v = ((v * lo) >> 16) + (v * hi);
-                sum += v;
-            }
+            if (PA_LIKELY(cv > 0))
+                sum += pa_mult_s16_volume(*((int16_t*) m->ptr), m->linear[channel].i);
             m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
         }
 
@@ -170,17 +157,10 @@ static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
 
         for (i = 0; i < nstreams; i++) {
             pa_mix_info *m = streams + i;
-            int32_t v, lo, hi, cv = m->linear[channel].i;
-
-            if (PA_LIKELY(cv > 0)) {
-
-                hi = cv >> 16;
-                lo = cv & 0xFFFF;
+            int32_t cv = m->linear[channel].i;
 
-                v = PA_INT16_SWAP(*((int16_t*) m->ptr));
-                v = ((v * lo) >> 16) + (v * hi);
-                sum += v;
-            }
+            if (PA_LIKELY(cv > 0))
+                sum += pa_mult_s16_volume(PA_INT16_SWAP(*((int16_t*) m->ptr)), cv);
             m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
         }
 
@@ -207,7 +187,6 @@ static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = *((int32_t*) m->ptr);
                 v = (v * cv) >> 16;
                 sum += v;
@@ -238,7 +217,6 @@ static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = PA_INT32_SWAP(*((int32_t*) m->ptr));
                 v = (v * cv) >> 16;
                 sum += v;
@@ -269,7 +247,6 @@ static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = (int32_t) (PA_READ24NE(m->ptr) << 8);
                 v = (v * cv) >> 16;
                 sum += v;
@@ -300,7 +277,6 @@ static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned ch
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = (int32_t) (PA_READ24RE(m->ptr) << 8);
                 v = (v * cv) >> 16;
                 sum += v;
@@ -331,7 +307,6 @@ static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = (int32_t) (*((uint32_t*)m->ptr) << 8);
                 v = (v * cv) >> 16;
                 sum += v;
@@ -362,7 +337,6 @@ static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned
             int64_t v;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
                 v = (v * cv) >> 16;
                 sum += v;
@@ -392,7 +366,6 @@ static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned chann
             int32_t v, cv = m->linear[channel].i;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
                 v = (v * cv) >> 16;
                 sum += v;
@@ -419,17 +392,10 @@ static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
 
         for (i = 0; i < nstreams; i++) {
             pa_mix_info *m = streams + i;
-            int32_t v, hi, lo, cv = m->linear[channel].i;
-
-            if (PA_LIKELY(cv > 0)) {
-
-                hi = cv >> 16;
-                lo = cv & 0xFFFF;
+            int32_t cv = m->linear[channel].i;
 
-                v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
-                v = ((v * lo) >> 16) + (v * hi);
-                sum += v;
-            }
+            if (PA_LIKELY(cv > 0))
+                sum += pa_mult_s16_volume(st_ulaw2linear16(*((uint8_t*) m->ptr)), cv);
             m->ptr = (uint8_t*) m->ptr + 1;
         }
 
@@ -452,17 +418,10 @@ static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned cha
 
         for (i = 0; i < nstreams; i++) {
             pa_mix_info *m = streams + i;
-            int32_t v, hi, lo, cv = m->linear[channel].i;
-
-            if (PA_LIKELY(cv > 0)) {
-
-                hi = cv >> 16;
-                lo = cv & 0xFFFF;
+            int32_t cv = m->linear[channel].i;
 
-                v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
-                v = ((v * lo) >> 16) + (v * hi);
-                sum += v;
-            }
+            if (PA_LIKELY(cv > 0))
+                sum += pa_mult_s16_volume(st_alaw2linear16(*((uint8_t*) m->ptr)), cv);
             m->ptr = (uint8_t*) m->ptr + 1;
         }
 
@@ -488,7 +447,6 @@ static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigne
             float v, cv = m->linear[channel].f;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = *((float*) m->ptr);
                 v *= cv;
                 sum += v;
@@ -517,7 +475,6 @@ static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigne
             float v, cv = m->linear[channel].f;
 
             if (PA_LIKELY(cv > 0)) {
-
                 v = PA_FLOAT32_SWAP(*(float*) m->ptr);
                 v *= cv;
                 sum += v;
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index d308caa..dda1be5 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -23,6 +23,9 @@
   USA.
 ***/
 
+#include <inttypes.h>
+#include <limits.h>
+
 #include <pulse/gccmacro.h>
 #include <pulse/sample.h>
 #include <pulse/volume.h>
@@ -53,6 +56,23 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss,
 
 void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const void *src, size_t sstr, unsigned n);
 
+static inline int32_t pa_mult_s16_volume(int16_t v, int32_t cv) {
+#if __WORDSIZE == 64 || ((ULONG_MAX) > (UINT_MAX))
+    /* Multiply with 64 bit integers on 64 bit platforms */
+    return (v * (int64_t) cv) >> 16;
+#else
+    /* Multiplying the 32 bit volume factor with the
+     * 16 bit sample might result in an 48 bit value. We
+     * want to do without 64 bit integers and hence do
+     * the multiplication independently for the HI and
+     * LO part of the volume. */
+
+    int32_t hi = cv >> 16;
+    int32_t lo = cv & 0xFFFF;
+    return ((v * lo) >> 16) + (v * hi);
+#endif
+}
+
 pa_usec_t pa_bytes_to_usec_round_up(uint64_t length, const pa_sample_spec *spec);
 size_t pa_usec_to_bytes_round_up(pa_usec_t t, const pa_sample_spec *spec);
 
diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
index 13ac667..43b953c 100644
--- a/src/pulsecore/svolume_c.c
+++ b/src/pulsecore/svolume_c.c
@@ -24,7 +24,6 @@
 #include <config.h>
 #endif
 
-
 #include <pulsecore/macro.h>
 #include <pulsecore/g711.h>
 #include <pulsecore/endianmacros.h>
@@ -35,13 +34,8 @@ static void pa_volume_u8_c(uint8_t *samples, const int32_t *volumes, unsigned ch
     unsigned channel;
 
     for (channel = 0; length; length--) {
-        int32_t t, hi, lo;
-
-        hi = volumes[channel] >> 16;
-        lo = volumes[channel] & 0xFFFF;
+        int32_t t = pa_mult_s16_volume(*samples - 0x80, volumes[channel]);
 
-        t = (int32_t) *samples - 0x80;
-        t = ((t * lo) >> 16) + (t * hi);
         t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
         *samples++ = (uint8_t) (t + 0x80);
 
@@ -54,13 +48,8 @@ static void pa_volume_alaw_c(uint8_t *samples, const int32_t *volumes, unsigned
     unsigned channel;
 
     for (channel = 0; length; length--) {
-        int32_t t, hi, lo;
+        int32_t t = pa_mult_s16_volume(st_alaw2linear16(*samples), volumes[channel]);
 
-        hi = volumes[channel] >> 16;
-        lo = volumes[channel] & 0xFFFF;
-
-        t = (int32_t) st_alaw2linear16(*samples);
-        t = ((t * lo) >> 16) + (t * hi);
         t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
         *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
 
@@ -73,13 +62,8 @@ static void pa_volume_ulaw_c(uint8_t *samples, const int32_t *volumes, unsigned
     unsigned channel;
 
     for (channel = 0; length; length--) {
-        int32_t t, hi, lo;
-
-        hi = volumes[channel] >> 16;
-        lo = volumes[channel] & 0xFFFF;
+        int32_t t = pa_mult_s16_volume(st_ulaw2linear16(*samples), volumes[channel]);
 
-        t = (int32_t) st_ulaw2linear16(*samples);
-        t = ((t * lo) >> 16) + (t * hi);
         t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
         *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
 
@@ -94,19 +78,8 @@ static void pa_volume_s16ne_c(int16_t *samples, const int32_t *volumes, unsigned
     length /= sizeof(int16_t);
 
     for (channel = 0; length; length--) {
-        int32_t t, hi, lo;
+        int32_t t = pa_mult_s16_volume(*samples, volumes[channel]);
 
-        /* Multiplying the 32bit volume factor with the 16bit
-         * sample might result in an 48bit value. We want to
-         * do without 64 bit integers and hence do the
-         * multiplication independently for the HI and LO part
-         * of the volume. */
-
-        hi = volumes[channel] >> 16;
-        lo = volumes[channel] & 0xFFFF;
-
-        t = (int32_t)(*samples);
-        t = ((t * lo) >> 16) + (t * hi);
         t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
         *samples++ = (int16_t) t;
 
@@ -121,13 +94,8 @@ static void pa_volume_s16re_c(int16_t *samples, const int32_t *volumes, unsigned
     length /= sizeof(int16_t);
 
     for (channel = 0; length; length--) {
-        int32_t t, hi, lo;
-
-        hi = volumes[channel] >> 16;
-        lo = volumes[channel] & 0xFFFF;
+        int32_t t = pa_mult_s16_volume(PA_INT16_SWAP(*samples), volumes[channel]);
 
-        t = (int32_t) PA_INT16_SWAP(*samples);
-        t = ((t * lo) >> 16) + (t * hi);
         t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
         *samples++ = PA_INT16_SWAP((int16_t) t);
 

commit b123cfa7c961080f32a1ff46e22d1c567bb52b0d
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:04 2013 +0100

    mix: Combine loops over streams in pa_mix()
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 73a0962..0db5f59 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -561,7 +561,6 @@ size_t pa_mix(
 
     pa_cvolume full_volume;
     unsigned k;
-    unsigned z;
     void *end;
 
     pa_assert(streams);
@@ -577,12 +576,11 @@ size_t pa_mix(
         return length;
     }
 
-    for (k = 0; k < nstreams; k++)
+    for (k = 0; k < nstreams; k++) {
         streams[k].ptr = pa_memblock_acquire_chunk(&streams[k].chunk);
-
-    for (z = 0; z < nstreams; z++)
-        if (length > streams[z].chunk.length)
-            length = streams[z].chunk.length;
+        if (length > streams[k].chunk.length)
+            length = streams[k].chunk.length;
+    }
 
     end = (uint8_t*) data + length;
 

commit 9fa000bbfca89e5d094ac5dc157bc7c0ff5f6a51
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:03 2013 +0100

    mix: Export function to get/set mixing implementation for a sample format
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 2a132e1..73a0962 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -534,8 +534,6 @@ static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigne
     }
 }
 
-typedef void (*pa_do_mix_func_t) (pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end);
-
 static pa_do_mix_func_t do_mix_table[] = {
     [PA_SAMPLE_U8]        = (pa_do_mix_func_t) pa_mix_u8_c,
     [PA_SAMPLE_ALAW]      = (pa_do_mix_func_t) pa_mix_alaw_c,
@@ -597,6 +595,20 @@ size_t pa_mix(
     return length;
 }
 
+pa_do_mix_func_t pa_get_mix_func(pa_sample_format_t f) {
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return do_mix_table[f];
+}
+
+void pa_set_mix_func(pa_sample_format_t f, pa_do_mix_func_t func) {
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    do_mix_table[f] = func;
+}
+
 typedef union {
   float f;
   uint32_t i;
diff --git a/src/pulsecore/mix.h b/src/pulsecore/mix.h
index 34c998b..27198fc 100644
--- a/src/pulsecore/mix.h
+++ b/src/pulsecore/mix.h
@@ -51,6 +51,11 @@ size_t pa_mix(
     const pa_cvolume *volume,
     pa_bool_t mute);
 
+typedef void (*pa_do_mix_func_t) (pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end);
+
+pa_do_mix_func_t pa_get_mix_func(pa_sample_format_t f);
+void pa_set_mix_func(pa_sample_format_t f, pa_do_mix_func_t func);
+
 void pa_volume_memchunk(
     pa_memchunk*c,
     const pa_sample_spec *spec,

commit fe455ae0137c44416b5f46a87abf33c05f513375
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:02 2013 +0100

    mix: Split pa_mix() code using function table
    
    have individual function for mixing stream with different sample format instead
    of huge case block in pa_mix()
    
    shorter functions, prepare for optimized code path
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index af854cc..2a132e1 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -122,490 +122,476 @@ static const pa_calc_stream_volumes_func_t calc_stream_volumes_table[] = {
   [PA_SAMPLE_S24_32BE]  = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes
 };
 
-size_t pa_mix(
-        pa_mix_info streams[],
-        unsigned nstreams,
-        void *data,
-        size_t length,
-        const pa_sample_spec *spec,
-        const pa_cvolume *volume,
-        pa_bool_t mute) {
-
-    pa_cvolume full_volume;
-    unsigned k;
-    unsigned z;
-    void *end;
-
-    pa_assert(streams);
-    pa_assert(data);
-    pa_assert(length);
-    pa_assert(spec);
-
-    if (!volume)
-        volume = pa_cvolume_reset(&full_volume, spec->channels);
+static void pa_mix_s16ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-    if (mute || pa_cvolume_is_muted(volume) || nstreams <= 0) {
-        pa_silence_memory(data, length, spec);
-        return length;
-    }
-
-    for (k = 0; k < nstreams; k++)
-        streams[k].ptr = pa_memblock_acquire_chunk(&streams[k].chunk);
+    while (data < end) {
+        int32_t sum = 0;
+        unsigned i;
 
-    for (z = 0; z < nstreams; z++)
-        if (length > streams[z].chunk.length)
-            length = streams[z].chunk.length;
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t v, lo, hi, cv = m->linear[channel].i;
 
-    end = (uint8_t*) data + length;
+            if (PA_LIKELY(cv > 0)) {
 
-    calc_stream_volumes_table[spec->format](streams, nstreams, volume, spec);
+                /* Multiplying the 32bit volume factor with the
+                 * 16bit sample might result in an 48bit value. We
+                 * want to do without 64 bit integers and hence do
+                 * the multiplication independently for the HI and
+                 * LO part of the volume. */
 
-    switch (spec->format) {
+                hi = cv >> 16;
+                lo = cv & 0xFFFF;
 
-        case PA_SAMPLE_S16NE:{
-            unsigned channel = 0;
+                v = *((int16_t*) m->ptr);
+                v = ((v * lo) >> 16) + (v * hi);
+                sum += v;
+            }
+            m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
+        }
 
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *((int16_t*) data) = (int16_t) sum;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, lo, hi, cv = m->linear[channel].i;
+        data = (uint8_t*) data + sizeof(int16_t);
 
-                    if (PA_LIKELY(cv > 0)) {
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                        /* Multiplying the 32bit volume factor with the
-                         * 16bit sample might result in an 48bit value. We
-                         * want to do without 64 bit integers and hence do
-                         * the multiplication independently for the HI and
-                         * LO part of the volume. */
+static void pa_mix_s16re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
+    while (data < end) {
+        int32_t sum = 0;
+        unsigned i;
 
-                        v = *((int16_t*) m->ptr);
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
-                }
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t v, lo, hi, cv = m->linear[channel].i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((int16_t*) data) = (int16_t) sum;
+            if (PA_LIKELY(cv > 0)) {
 
-                data = (uint8_t*) data + sizeof(int16_t);
+                hi = cv >> 16;
+                lo = cv & 0xFFFF;
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = PA_INT16_SWAP(*((int16_t*) m->ptr));
+                v = ((v * lo) >> 16) + (v * hi);
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
         }
 
-        case PA_SAMPLE_S16RE:{
-            unsigned channel = 0;
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *((int16_t*) data) = PA_INT16_SWAP((int16_t) sum);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, lo, hi, cv = m->linear[channel].i;
+        data = (uint8_t*) data + sizeof(int16_t);
 
-                    if (PA_LIKELY(cv > 0)) {
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
+static void pa_mix_s32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = PA_INT16_SWAP(*((int16_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((int16_t*) data) = PA_INT16_SWAP((int16_t) sum);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + sizeof(int16_t);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = *((int32_t*) m->ptr);
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
         }
 
-        case PA_SAMPLE_S32NE:{
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        *((int32_t*) data) = (int32_t) sum;
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(int32_t);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_s32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = *((int32_t*) m->ptr);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((int32_t*) data) = (int32_t) sum;
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + sizeof(int32_t);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = PA_INT32_SWAP(*((int32_t*) m->ptr));
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
         }
 
-        case PA_SAMPLE_S32RE:{
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        *((int32_t*) data) = PA_INT32_SWAP((int32_t) sum);
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(int32_t);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_s24ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = PA_INT32_SWAP(*((int32_t*) m->ptr));
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((int32_t*) data) = PA_INT32_SWAP((int32_t) sum);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + sizeof(int32_t);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) (PA_READ24NE(m->ptr) << 8);
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + 3;
         }
 
-        case PA_SAMPLE_S24NE: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        PA_WRITE24NE(data, ((uint32_t) sum) >> 8);
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + 3;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_s24re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) (PA_READ24NE(m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24NE(data, ((uint32_t) sum) >> 8);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + 3;
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) (PA_READ24RE(m->ptr) << 8);
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + 3;
         }
 
-        case PA_SAMPLE_S24RE: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        PA_WRITE24RE(data, ((uint32_t) sum) >> 8);
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + 3;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_s24_32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) (PA_READ24RE(m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24RE(data, ((uint32_t) sum) >> 8);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + 3;
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) (*((uint32_t*)m->ptr) << 8);
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
         }
 
-        case PA_SAMPLE_S24_32NE: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        *((uint32_t*) data) = ((uint32_t) (int32_t) sum) >> 8;
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(uint32_t);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_s24_32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) (*((uint32_t*)m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
+    while (data < end) {
+        int64_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((uint32_t*) data) = ((uint32_t) (int32_t) sum) >> 8;
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t cv = m->linear[channel].i;
+            int64_t v;
 
-                data = (uint8_t*) data + sizeof(uint32_t);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + 3;
         }
 
-        case PA_SAMPLE_S24_32RE: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+        *((uint32_t*) data) = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
 
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(uint32_t);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_u8_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
+    while (data < end) {
+        int32_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((uint32_t*) data) = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t v, cv = m->linear[channel].i;
 
-                data = (uint8_t*) data + sizeof(uint32_t);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
+                v = (v * cv) >> 16;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + 1;
         }
 
-        case PA_SAMPLE_U8: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x80, 0x7F);
+        *((uint8_t*) data) = (uint8_t) (sum + 0x80);
 
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + 1;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, cv = m->linear[channel].i;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_ulaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
+    while (data < end) {
+        int32_t sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80, 0x7F);
-                *((uint8_t*) data) = (uint8_t) (sum + 0x80);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t v, hi, lo, cv = m->linear[channel].i;
 
-                data = (uint8_t*) data + 1;
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
+                hi = cv >> 16;
+                lo = cv & 0xFFFF;
 
-            break;
+                v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
+                v = ((v * lo) >> 16) + (v * hi);
+                sum += v;
+            }
+            m->ptr = (uint8_t*) m->ptr + 1;
         }
 
-        case PA_SAMPLE_ULAW: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *((uint8_t*) data) = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
 
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + 1;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, hi, lo, cv = m->linear[channel].i;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_alaw_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
+    while (data < end) {
+        int32_t sum = 0;
+        unsigned i;
 
-                        v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            int32_t v, hi, lo, cv = m->linear[channel].i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((uint8_t*) data) = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
+            if (PA_LIKELY(cv > 0)) {
 
-                data = (uint8_t*) data + 1;
+                hi = cv >> 16;
+                lo = cv & 0xFFFF;
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
+                v = ((v * lo) >> 16) + (v * hi);
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + 1;
         }
 
-        case PA_SAMPLE_ALAW: {
-            unsigned channel = 0;
+        sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+        *((uint8_t*) data) = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
 
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + 1;
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, hi, lo, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
+static void pa_mix_float32ne_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
+    while (data < end) {
+        float sum = 0;
+        unsigned i;
 
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((uint8_t*) data) = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            float v, cv = m->linear[channel].f;
 
-                data = (uint8_t*) data + 1;
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = *((float*) m->ptr);
+                v *= cv;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(float);
         }
 
-        case PA_SAMPLE_FLOAT32NE: {
-            unsigned channel = 0;
+        *((float*) data) = sum;
 
-            while (data < end) {
-                float sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(float);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    float v, cv = m->linear[channel].f;
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                    if (PA_LIKELY(cv > 0)) {
+static void pa_mix_float32re_c(pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end) {
+    unsigned channel = 0;
 
-                        v = *((float*) m->ptr);
-                        v *= cv;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
-                }
+    while (data < end) {
+        float sum = 0;
+        unsigned i;
 
-                *((float*) data) = sum;
+        for (i = 0; i < nstreams; i++) {
+            pa_mix_info *m = streams + i;
+            float v, cv = m->linear[channel].f;
 
-                data = (uint8_t*) data + sizeof(float);
+            if (PA_LIKELY(cv > 0)) {
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
+                v = PA_FLOAT32_SWAP(*(float*) m->ptr);
+                v *= cv;
+                sum += v;
             }
-
-            break;
+            m->ptr = (uint8_t*) m->ptr + sizeof(float);
         }
 
-        case PA_SAMPLE_FLOAT32RE: {
-            unsigned channel = 0;
+        *((float*) data) = PA_FLOAT32_SWAP(sum);
 
-            while (data < end) {
-                float sum = 0;
-                unsigned i;
+        data = (uint8_t*) data + sizeof(float);
 
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    float v, cv = m->linear[channel].f;
-
-                    if (PA_LIKELY(cv > 0)) {
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
 
-                        v = PA_FLOAT32_SWAP(*(float*) m->ptr);
-                        v *= cv;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
-                }
+typedef void (*pa_do_mix_func_t) (pa_mix_info streams[], unsigned nstreams, unsigned channels, void *data, void *end);
+
+static pa_do_mix_func_t do_mix_table[] = {
+    [PA_SAMPLE_U8]        = (pa_do_mix_func_t) pa_mix_u8_c,
+    [PA_SAMPLE_ALAW]      = (pa_do_mix_func_t) pa_mix_alaw_c,
+    [PA_SAMPLE_ULAW]      = (pa_do_mix_func_t) pa_mix_ulaw_c,
+    [PA_SAMPLE_S16NE]     = (pa_do_mix_func_t) pa_mix_s16ne_c,
+    [PA_SAMPLE_S16RE]     = (pa_do_mix_func_t) pa_mix_s16re_c,
+    [PA_SAMPLE_FLOAT32NE] = (pa_do_mix_func_t) pa_mix_float32ne_c,
+    [PA_SAMPLE_FLOAT32RE] = (pa_do_mix_func_t) pa_mix_float32re_c,
+    [PA_SAMPLE_S32NE]     = (pa_do_mix_func_t) pa_mix_s32ne_c,
+    [PA_SAMPLE_S32RE]     = (pa_do_mix_func_t) pa_mix_s32re_c,
+    [PA_SAMPLE_S24NE]     = (pa_do_mix_func_t) pa_mix_s24ne_c,
+    [PA_SAMPLE_S24RE]     = (pa_do_mix_func_t) pa_mix_s24re_c,
+    [PA_SAMPLE_S24_32NE]  = (pa_do_mix_func_t) pa_mix_s24_32ne_c,
+    [PA_SAMPLE_S24_32RE]  = (pa_do_mix_func_t) pa_mix_s24_32re_c
+};
 
-                *((float*) data) = PA_FLOAT32_SWAP(sum);
+size_t pa_mix(
+        pa_mix_info streams[],
+        unsigned nstreams,
+        void *data,
+        size_t length,
+        const pa_sample_spec *spec,
+        const pa_cvolume *volume,
+        pa_bool_t mute) {
 
-                data = (uint8_t*) data + sizeof(float);
+    pa_cvolume full_volume;
+    unsigned k;
+    unsigned z;
+    void *end;
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
+    pa_assert(streams);
+    pa_assert(data);
+    pa_assert(length);
+    pa_assert(spec);
 
-            break;
-        }
+    if (!volume)
+        volume = pa_cvolume_reset(&full_volume, spec->channels);
 
-        default:
-            pa_log_error("Unable to mix audio data of format %s.", pa_sample_format_to_string(spec->format));
-            pa_assert_not_reached();
+    if (mute || pa_cvolume_is_muted(volume) || nstreams <= 0) {
+        pa_silence_memory(data, length, spec);
+        return length;
     }
 
     for (k = 0; k < nstreams; k++)
+        streams[k].ptr = pa_memblock_acquire_chunk(&streams[k].chunk);
+
+    for (z = 0; z < nstreams; z++)
+        if (length > streams[z].chunk.length)
+            length = streams[z].chunk.length;
+
+    end = (uint8_t*) data + length;
+
+    calc_stream_volumes_table[spec->format](streams, nstreams, volume, spec);
+    do_mix_table[spec->format](streams, nstreams, spec->channels, data, end);
+
+    for (k = 0; k < nstreams; k++)
         pa_memblock_release(streams[k].chunk.memblock);
 
     return length;

commit c90868f2e09a5706730a8e3368c114580a38d0a5
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:01 2013 +0100

    mix: Use table for calc_stream_columes()
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
index 93978eb..af854cc 100644
--- a/src/pulsecore/mix.c
+++ b/src/pulsecore/mix.c
@@ -104,6 +104,24 @@ static void calc_linear_float_stream_volumes(pa_mix_info streams[], unsigned nst
     }
 }
 
+typedef void (*pa_calc_stream_volumes_func_t) (pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec);
+
+static const pa_calc_stream_volumes_func_t calc_stream_volumes_table[] = {
+  [PA_SAMPLE_U8]        = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_ALAW]      = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_ULAW]      = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S16LE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S16BE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_FLOAT32LE] = (pa_calc_stream_volumes_func_t) calc_linear_float_stream_volumes,
+  [PA_SAMPLE_FLOAT32BE] = (pa_calc_stream_volumes_func_t) calc_linear_float_stream_volumes,
+  [PA_SAMPLE_S32LE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S32BE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S24LE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S24BE]     = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S24_32LE]  = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes,
+  [PA_SAMPLE_S24_32BE]  = (pa_calc_stream_volumes_func_t) calc_linear_integer_stream_volumes
+};
+
 size_t pa_mix(
         pa_mix_info streams[],
         unsigned nstreams,
@@ -140,13 +158,13 @@ size_t pa_mix(
 
     end = (uint8_t*) data + length;
 
+    calc_stream_volumes_table[spec->format](streams, nstreams, volume, spec);
+
     switch (spec->format) {
 
         case PA_SAMPLE_S16NE:{
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int32_t sum = 0;
                 unsigned i;
@@ -188,8 +206,6 @@ size_t pa_mix(
         case PA_SAMPLE_S16RE:{
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int32_t sum = 0;
                 unsigned i;
@@ -225,8 +241,6 @@ size_t pa_mix(
         case PA_SAMPLE_S32NE:{
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -260,8 +274,6 @@ size_t pa_mix(
         case PA_SAMPLE_S32RE:{
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -295,8 +307,6 @@ size_t pa_mix(
         case PA_SAMPLE_S24NE: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -330,8 +340,6 @@ size_t pa_mix(
         case PA_SAMPLE_S24RE: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -365,8 +373,6 @@ size_t pa_mix(
         case PA_SAMPLE_S24_32NE: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -400,8 +406,6 @@ size_t pa_mix(
         case PA_SAMPLE_S24_32RE: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int64_t sum = 0;
                 unsigned i;
@@ -435,8 +439,6 @@ size_t pa_mix(
         case PA_SAMPLE_U8: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int32_t sum = 0;
                 unsigned i;
@@ -469,8 +471,6 @@ size_t pa_mix(
         case PA_SAMPLE_ULAW: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int32_t sum = 0;
                 unsigned i;
@@ -506,8 +506,6 @@ size_t pa_mix(
         case PA_SAMPLE_ALAW: {
             unsigned channel = 0;
 
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 int32_t sum = 0;
                 unsigned i;
@@ -543,8 +541,6 @@ size_t pa_mix(
         case PA_SAMPLE_FLOAT32NE: {
             unsigned channel = 0;
 
-            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 float sum = 0;
                 unsigned i;
@@ -576,8 +572,6 @@ size_t pa_mix(
         case PA_SAMPLE_FLOAT32RE: {
             unsigned channel = 0;
 
-            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
-
             while (data < end) {
                 float sum = 0;
                 unsigned i;

commit 1335914e72f66d6bd933b2d5df31d63f7a048ab5
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:27:00 2013 +0100

    sample-util: Remove duplicate stdio.h #include
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 26c0a01..bda5a5e 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -24,7 +24,6 @@
 #include <config.h>
 #endif
 
-#include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>

commit 95b64804ab9d21c6807c25eb3f03afd6bfd35cb6
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:26:59 2013 +0100

    core: Move pa_mix() into new file mix.c
    
    idea is to allow optimized code path (similar to volume code)
    and rework/specialize mixing cases to enable runtime performance improvements
    
    no functionality changes in this patch
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/Makefile.am b/src/Makefile.am
index 1208a35..22b0409 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -856,6 +856,7 @@ libpulsecore_ at PA_MAJORMINOR@_la_SOURCES = \
 		pulsecore/remap_mmx.c pulsecore/remap_sse.c \
 		pulsecore/resampler.c pulsecore/resampler.h \
 		pulsecore/rtpoll.c pulsecore/rtpoll.h \
+		pulsecore/mix.c pulsecore/mix.h \
 		pulsecore/cpu.h \
 		pulsecore/cpu-arm.c pulsecore/cpu-arm.h \
 		pulsecore/cpu-x86.c pulsecore/cpu-x86.h \
diff --git a/src/modules/module-virtual-source.c b/src/modules/module-virtual-source.c
index 4bdcded..be2de5a 100644
--- a/src/modules/module-virtual-source.c
+++ b/src/modules/module-virtual-source.c
@@ -39,6 +39,7 @@
 #include <pulsecore/rtpoll.h>
 #include <pulsecore/sample-util.h>
 #include <pulsecore/ltdl-helper.h>
+#include <pulsecore/mix.h>
 
 #include "module-virtual-source-symdef.h"
 
diff --git a/src/pulsecore/mix.c b/src/pulsecore/mix.c
new file mode 100644
index 0000000..93978eb
--- /dev/null
+++ b/src/pulsecore/mix.c
@@ -0,0 +1,679 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2006 Pierre Ossman <ossman at cendio.se> for Cendio AB
+  Copyright 2013 Peter Meerwald <pmeerw at pmeerw.net>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include <pulsecore/sample-util.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/endianmacros.h>
+
+#include "mix.h"
+
+#define VOLUME_PADDING 32
+
+static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) {
+    unsigned channel, nchannels, padding;
+
+    pa_assert(linear);
+    pa_assert(volume);
+
+    nchannels = volume->channels;
+
+    for (channel = 0; channel < nchannels; channel++)
+        linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000);
+
+    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+        linear[channel] = linear[padding];
+}
+
+static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) {
+    unsigned channel, nchannels, padding;
+
+    pa_assert(linear);
+    pa_assert(volume);
+
+    nchannels = volume->channels;
+
+    for (channel = 0; channel < nchannels; channel++)
+        linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]);
+
+    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+        linear[channel] = linear[padding];
+}
+
+static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
+    unsigned k, channel;
+    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];
+
+    pa_assert(streams);
+    pa_assert(spec);
+    pa_assert(volume);
+
+    calc_linear_float_volume(linear, volume);
+
+    for (k = 0; k < nstreams; k++) {
+
+        for (channel = 0; channel < spec->channels; channel++) {
+            pa_mix_info *m = streams + k;
+            m->linear[channel].i = (int32_t) lrint(pa_sw_volume_to_linear(m->volume.values[channel]) * linear[channel] * 0x10000);
+        }
+    }
+}
+
+static void calc_linear_float_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
+    unsigned k, channel;
+    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];
+
+    pa_assert(streams);
+    pa_assert(spec);
+    pa_assert(volume);
+
+    calc_linear_float_volume(linear, volume);
+
+    for (k = 0; k < nstreams; k++) {
+
+        for (channel = 0; channel < spec->channels; channel++) {
+            pa_mix_info *m = streams + k;
+            m->linear[channel].f = (float) (pa_sw_volume_to_linear(m->volume.values[channel]) * linear[channel]);
+        }
+    }
+}
+
+size_t pa_mix(
+        pa_mix_info streams[],
+        unsigned nstreams,
+        void *data,
+        size_t length,
+        const pa_sample_spec *spec,
+        const pa_cvolume *volume,
+        pa_bool_t mute) {
+
+    pa_cvolume full_volume;
+    unsigned k;
+    unsigned z;
+    void *end;
+
+    pa_assert(streams);
+    pa_assert(data);
+    pa_assert(length);
+    pa_assert(spec);
+
+    if (!volume)
+        volume = pa_cvolume_reset(&full_volume, spec->channels);
+
+    if (mute || pa_cvolume_is_muted(volume) || nstreams <= 0) {
+        pa_silence_memory(data, length, spec);
+        return length;
+    }
+
+    for (k = 0; k < nstreams; k++)
+        streams[k].ptr = pa_memblock_acquire_chunk(&streams[k].chunk);
+
+    for (z = 0; z < nstreams; z++)
+        if (length > streams[z].chunk.length)
+            length = streams[z].chunk.length;
+
+    end = (uint8_t*) data + length;
+
+    switch (spec->format) {
+
+        case PA_SAMPLE_S16NE:{
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int32_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t v, lo, hi, cv = m->linear[channel].i;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        /* Multiplying the 32bit volume factor with the
+                         * 16bit sample might result in an 48bit value. We
+                         * want to do without 64 bit integers and hence do
+                         * the multiplication independently for the HI and
+                         * LO part of the volume. */
+
+                        hi = cv >> 16;
+                        lo = cv & 0xFFFF;
+
+                        v = *((int16_t*) m->ptr);
+                        v = ((v * lo) >> 16) + (v * hi);
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+                *((int16_t*) data) = (int16_t) sum;
+
+                data = (uint8_t*) data + sizeof(int16_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S16RE:{
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int32_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t v, lo, hi, cv = m->linear[channel].i;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        hi = cv >> 16;
+                        lo = cv & 0xFFFF;
+
+                        v = PA_INT16_SWAP(*((int16_t*) m->ptr));
+                        v = ((v * lo) >> 16) + (v * hi);
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+                *((int16_t*) data) = PA_INT16_SWAP((int16_t) sum);
+
+                data = (uint8_t*) data + sizeof(int16_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S32NE:{
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = *((int32_t*) m->ptr);
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                *((int32_t*) data) = (int32_t) sum;
+
+                data = (uint8_t*) data + sizeof(int32_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S32RE:{
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = PA_INT32_SWAP(*((int32_t*) m->ptr));
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                *((int32_t*) data) = PA_INT32_SWAP((int32_t) sum);
+
+                data = (uint8_t*) data + sizeof(int32_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S24NE: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = (int32_t) (PA_READ24NE(m->ptr) << 8);
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 3;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                PA_WRITE24NE(data, ((uint32_t) sum) >> 8);
+
+                data = (uint8_t*) data + 3;
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S24RE: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = (int32_t) (PA_READ24RE(m->ptr) << 8);
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 3;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                PA_WRITE24RE(data, ((uint32_t) sum) >> 8);
+
+                data = (uint8_t*) data + 3;
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S24_32NE: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = (int32_t) (*((uint32_t*)m->ptr) << 8);
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                *((uint32_t*) data) = ((uint32_t) (int32_t) sum) >> 8;
+
+                data = (uint8_t*) data + sizeof(uint32_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_S24_32RE: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int64_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t cv = m->linear[channel].i;
+                    int64_t v;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 3;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
+                *((uint32_t*) data) = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
+
+                data = (uint8_t*) data + sizeof(uint32_t);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_U8: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int32_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t v, cv = m->linear[channel].i;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
+                        v = (v * cv) >> 16;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 1;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x80, 0x7F);
+                *((uint8_t*) data) = (uint8_t) (sum + 0x80);
+
+                data = (uint8_t*) data + 1;
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_ULAW: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int32_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t v, hi, lo, cv = m->linear[channel].i;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        hi = cv >> 16;
+                        lo = cv & 0xFFFF;
+
+                        v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
+                        v = ((v * lo) >> 16) + (v * hi);
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 1;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+                *((uint8_t*) data) = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
+
+                data = (uint8_t*) data + 1;
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_ALAW: {
+            unsigned channel = 0;
+
+            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                int32_t sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    int32_t v, hi, lo, cv = m->linear[channel].i;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        hi = cv >> 16;
+                        lo = cv & 0xFFFF;
+
+                        v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
+                        v = ((v * lo) >> 16) + (v * hi);
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + 1;
+                }
+
+                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
+                *((uint8_t*) data) = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
+
+                data = (uint8_t*) data + 1;
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_FLOAT32NE: {
+            unsigned channel = 0;
+
+            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                float sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    float v, cv = m->linear[channel].f;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = *((float*) m->ptr);
+                        v *= cv;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
+                }
+
+                *((float*) data) = sum;
+
+                data = (uint8_t*) data + sizeof(float);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        case PA_SAMPLE_FLOAT32RE: {
+            unsigned channel = 0;
+
+            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
+
+            while (data < end) {
+                float sum = 0;
+                unsigned i;
+
+                for (i = 0; i < nstreams; i++) {
+                    pa_mix_info *m = streams + i;
+                    float v, cv = m->linear[channel].f;
+
+                    if (PA_LIKELY(cv > 0)) {
+
+                        v = PA_FLOAT32_SWAP(*(float*) m->ptr);
+                        v *= cv;
+                        sum += v;
+                    }
+                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
+                }
+
+                *((float*) data) = PA_FLOAT32_SWAP(sum);
+
+                data = (uint8_t*) data + sizeof(float);
+
+                if (PA_UNLIKELY(++channel >= spec->channels))
+                    channel = 0;
+            }
+
+            break;
+        }
+
+        default:
+            pa_log_error("Unable to mix audio data of format %s.", pa_sample_format_to_string(spec->format));
+            pa_assert_not_reached();
+    }
+
+    for (k = 0; k < nstreams; k++)
+        pa_memblock_release(streams[k].chunk.memblock);
+
+    return length;
+}
+
+typedef union {
+  float f;
+  uint32_t i;
+} volume_val;
+
+typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
+
+static const pa_calc_volume_func_t calc_volume_table[] = {
+  [PA_SAMPLE_U8]        = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_ALAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_ULAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S16LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S16BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+  [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+  [PA_SAMPLE_S32LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S32BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24_32LE]  = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24_32BE]  = (pa_calc_volume_func_t) calc_linear_integer_volume
+};
+
+void pa_volume_memchunk(
+        pa_memchunk*c,
+        const pa_sample_spec *spec,
+        const pa_cvolume *volume) {
+
+    void *ptr;
+    volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING];
+    pa_do_volume_func_t do_volume;
+
+    pa_assert(c);
+    pa_assert(spec);
+    pa_assert(pa_sample_spec_valid(spec));
+    pa_assert(pa_frame_aligned(c->length, spec));
+    pa_assert(volume);
+
+    if (pa_memblock_is_silence(c->memblock))
+        return;
+
+    if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM))
+        return;
+
+    if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) {
+        pa_silence_memchunk(c, spec);
+        return;
+    }
+
+    do_volume = pa_get_volume_func(spec->format);
+    pa_assert(do_volume);
+
+    calc_volume_table[spec->format] ((void *)linear, volume);
+
+    ptr = pa_memblock_acquire_chunk(c);
+
+    do_volume(ptr, (void *)linear, spec->channels, c->length);
+
+    pa_memblock_release(c->memblock);
+}
diff --git a/src/pulsecore/mix.h b/src/pulsecore/mix.h
new file mode 100644
index 0000000..34c998b
--- /dev/null
+++ b/src/pulsecore/mix.h
@@ -0,0 +1,59 @@
+#ifndef foomixhfoo
+#define foomixhfoo
+
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2006 Pierre Ossman <ossman at cendio.se> for Cendio AB
+  Copyright 2013 Peter Meerwald <pmeerw at pmeerw.net>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <pulse/sample.h>
+#include <pulse/volume.h>
+#include <pulsecore/memchunk.h>
+
+typedef struct pa_mix_info {
+    pa_memchunk chunk;
+    pa_cvolume volume;
+    void *userdata;
+
+    /* The following fields are used internally by pa_mix(), should
+     * not be initialised by the caller of pa_mix(). */
+    void *ptr;
+    union {
+        int32_t i;
+        float f;
+    } linear[PA_CHANNELS_MAX];
+} pa_mix_info;
+
+size_t pa_mix(
+    pa_mix_info channels[],
+    unsigned nchannels,
+    void *data,
+    size_t length,
+    const pa_sample_spec *spec,
+    const pa_cvolume *volume,
+    pa_bool_t mute);
+
+void pa_volume_memchunk(
+    pa_memchunk*c,
+    const pa_sample_spec *spec,
+    const pa_cvolume *volume);
+
+#endif
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index d44e140..26c0a01 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -104,650 +104,6 @@ void* pa_silence_memory(void *p, size_t length, const pa_sample_spec *spec) {
     return p;
 }
 
-#define VOLUME_PADDING 32
-
-static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) {
-    unsigned channel, nchannels, padding;
-
-    pa_assert(linear);
-    pa_assert(volume);
-
-    nchannels = volume->channels;
-
-    for (channel = 0; channel < nchannels; channel++)
-        linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000);
-
-    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
-        linear[channel] = linear[padding];
-}
-
-static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) {
-    unsigned channel, nchannels, padding;
-
-    pa_assert(linear);
-    pa_assert(volume);
-
-    nchannels = volume->channels;
-
-    for (channel = 0; channel < nchannels; channel++)
-        linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]);
-
-    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
-        linear[channel] = linear[padding];
-}
-
-static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
-    unsigned k, channel;
-    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];
-
-    pa_assert(streams);
-    pa_assert(spec);
-    pa_assert(volume);
-
-    calc_linear_float_volume(linear, volume);
-
-    for (k = 0; k < nstreams; k++) {
-
-        for (channel = 0; channel < spec->channels; channel++) {
-            pa_mix_info *m = streams + k;
-            m->linear[channel].i = (int32_t) lrint(pa_sw_volume_to_linear(m->volume.values[channel]) * linear[channel] * 0x10000);
-        }
-    }
-}
-
-static void calc_linear_float_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
-    unsigned k, channel;
-    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];
-
-    pa_assert(streams);
-    pa_assert(spec);
-    pa_assert(volume);
-
-    calc_linear_float_volume(linear, volume);
-
-    for (k = 0; k < nstreams; k++) {
-
-        for (channel = 0; channel < spec->channels; channel++) {
-            pa_mix_info *m = streams + k;
-            m->linear[channel].f = (float) (pa_sw_volume_to_linear(m->volume.values[channel]) * linear[channel]);
-        }
-    }
-}
-
-size_t pa_mix(
-        pa_mix_info streams[],
-        unsigned nstreams,
-        void *data,
-        size_t length,
-        const pa_sample_spec *spec,
-        const pa_cvolume *volume,
-        pa_bool_t mute) {
-
-    pa_cvolume full_volume;
-    unsigned k;
-    unsigned z;
-    void *end;
-
-    pa_assert(streams);
-    pa_assert(data);
-    pa_assert(length);
-    pa_assert(spec);
-
-    if (!volume)
-        volume = pa_cvolume_reset(&full_volume, spec->channels);
-
-    if (mute || pa_cvolume_is_muted(volume) || nstreams <= 0) {
-        pa_silence_memory(data, length, spec);
-        return length;
-    }
-
-    for (k = 0; k < nstreams; k++)
-        streams[k].ptr = pa_memblock_acquire_chunk(&streams[k].chunk);
-
-    for (z = 0; z < nstreams; z++)
-        if (length > streams[z].chunk.length)
-            length = streams[z].chunk.length;
-
-    end = (uint8_t*) data + length;
-
-    switch (spec->format) {
-
-        case PA_SAMPLE_S16NE:{
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, lo, hi, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        /* Multiplying the 32bit volume factor with the
-                         * 16bit sample might result in an 48bit value. We
-                         * want to do without 64 bit integers and hence do
-                         * the multiplication independently for the HI and
-                         * LO part of the volume. */
-
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
-
-                        v = *((int16_t*) m->ptr);
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((int16_t*) data) = (int16_t) sum;
-
-                data = (uint8_t*) data + sizeof(int16_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S16RE:{
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, lo, hi, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
-
-                        v = PA_INT16_SWAP(*((int16_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((int16_t*) data) = PA_INT16_SWAP((int16_t) sum);
-
-                data = (uint8_t*) data + sizeof(int16_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S32NE:{
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = *((int32_t*) m->ptr);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((int32_t*) data) = (int32_t) sum;
-
-                data = (uint8_t*) data + sizeof(int32_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S32RE:{
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = PA_INT32_SWAP(*((int32_t*) m->ptr));
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((int32_t*) data) = PA_INT32_SWAP((int32_t) sum);
-
-                data = (uint8_t*) data + sizeof(int32_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S24NE: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = (int32_t) (PA_READ24NE(m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24NE(data, ((uint32_t) sum) >> 8);
-
-                data = (uint8_t*) data + 3;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S24RE: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = (int32_t) (PA_READ24RE(m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24RE(data, ((uint32_t) sum) >> 8);
-
-                data = (uint8_t*) data + 3;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S24_32NE: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = (int32_t) (*((uint32_t*)m->ptr) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((uint32_t*) data) = ((uint32_t) (int32_t) sum) >> 8;
-
-                data = (uint8_t*) data + sizeof(uint32_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S24_32RE: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int64_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t cv = m->linear[channel].i;
-                    int64_t v;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 3;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80000000LL, 0x7FFFFFFFLL);
-                *((uint32_t*) data) = PA_INT32_SWAP(((uint32_t) (int32_t) sum) >> 8);
-
-                data = (uint8_t*) data + sizeof(uint32_t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_U8: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
-                        v = (v * cv) >> 16;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x80, 0x7F);
-                *((uint8_t*) data) = (uint8_t) (sum + 0x80);
-
-                data = (uint8_t*) data + 1;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_ULAW: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, hi, lo, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
-
-                        v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((uint8_t*) data) = (uint8_t) st_14linear2ulaw((int16_t) sum >> 2);
-
-                data = (uint8_t*) data + 1;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_ALAW: {
-            unsigned channel = 0;
-
-            calc_linear_integer_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                int32_t sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    int32_t v, hi, lo, cv = m->linear[channel].i;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        hi = cv >> 16;
-                        lo = cv & 0xFFFF;
-
-                        v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
-                        v = ((v * lo) >> 16) + (v * hi);
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + 1;
-                }
-
-                sum = PA_CLAMP_UNLIKELY(sum, -0x8000, 0x7FFF);
-                *((uint8_t*) data) = (uint8_t) st_13linear2alaw((int16_t) sum >> 3);
-
-                data = (uint8_t*) data + 1;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_FLOAT32NE: {
-            unsigned channel = 0;
-
-            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                float sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    float v, cv = m->linear[channel].f;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = *((float*) m->ptr);
-                        v *= cv;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
-                }
-
-                *((float*) data) = sum;
-
-                data = (uint8_t*) data + sizeof(float);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_FLOAT32RE: {
-            unsigned channel = 0;
-
-            calc_linear_float_stream_volumes(streams, nstreams, volume, spec);
-
-            while (data < end) {
-                float sum = 0;
-                unsigned i;
-
-                for (i = 0; i < nstreams; i++) {
-                    pa_mix_info *m = streams + i;
-                    float v, cv = m->linear[channel].f;
-
-                    if (PA_LIKELY(cv > 0)) {
-
-                        v = PA_FLOAT32_SWAP(*(float*) m->ptr);
-                        v *= cv;
-                        sum += v;
-                    }
-                    m->ptr = (uint8_t*) m->ptr + sizeof(float);
-                }
-
-                *((float*) data) = PA_FLOAT32_SWAP(sum);
-
-                data = (uint8_t*) data + sizeof(float);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        default:
-            pa_log_error("Unable to mix audio data of format %s.", pa_sample_format_to_string(spec->format));
-            pa_assert_not_reached();
-    }
-
-    for (k = 0; k < nstreams; k++)
-        pa_memblock_release(streams[k].chunk.memblock);
-
-    return length;
-}
-
-typedef union {
-  float f;
-  uint32_t i;
-} volume_val;
-
-typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
-
-static const pa_calc_volume_func_t calc_volume_table[] = {
-  [PA_SAMPLE_U8]        = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_ALAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_ULAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S16LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S16BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume,
-  [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume,
-  [PA_SAMPLE_S32LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S32BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S24LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S24BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S24_32LE]  = (pa_calc_volume_func_t) calc_linear_integer_volume,
-  [PA_SAMPLE_S24_32BE]  = (pa_calc_volume_func_t) calc_linear_integer_volume
-};
-
-void pa_volume_memchunk(
-        pa_memchunk*c,
-        const pa_sample_spec *spec,
-        const pa_cvolume *volume) {
-
-    void *ptr;
-    volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING];
-    pa_do_volume_func_t do_volume;
-
-    pa_assert(c);
-    pa_assert(spec);
-    pa_assert(pa_sample_spec_valid(spec));
-    pa_assert(pa_frame_aligned(c->length, spec));
-    pa_assert(volume);
-
-    if (pa_memblock_is_silence(c->memblock))
-        return;
-
-    if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM))
-        return;
-
-    if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) {
-        pa_silence_memchunk(c, spec);
-        return;
-    }
-
-    do_volume = pa_get_volume_func(spec->format);
-    pa_assert(do_volume);
-
-    calc_volume_table[spec->format] ((void *)linear, volume);
-
-    ptr = pa_memblock_acquire_chunk(c);
-
-    do_volume (ptr, (void *)linear, spec->channels, c->length);
-
-    pa_memblock_release(c->memblock);
-}
-
 size_t pa_frame_align(size_t l, const pa_sample_spec *ss) {
     size_t fs;
 
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index a6b4600..d308caa 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -44,34 +44,6 @@ pa_memblock* pa_silence_memblock(pa_memblock *b, const pa_sample_spec *spec);
 
 pa_memchunk* pa_silence_memchunk_get(pa_silence_cache *cache, pa_mempool *pool, pa_memchunk* ret, const pa_sample_spec *spec, size_t length);
 
-typedef struct pa_mix_info {
-    pa_memchunk chunk;
-    pa_cvolume volume;
-    void *userdata;
-
-    /* The following fields are used internally by pa_mix(), should
-     * not be initialised by the caller of pa_mix(). */
-    void *ptr;
-    union {
-        int32_t i;
-        float f;
-    } linear[PA_CHANNELS_MAX];
-} pa_mix_info;
-
-size_t pa_mix(
-    pa_mix_info channels[],
-    unsigned nchannels,
-    void *data,
-    size_t length,
-    const pa_sample_spec *spec,
-    const pa_cvolume *volume,
-    pa_bool_t mute);
-
-void pa_volume_memchunk(
-    pa_memchunk*c,
-    const pa_sample_spec *spec,
-    const pa_cvolume *volume);
-
 size_t pa_frame_align(size_t l, const pa_sample_spec *ss) PA_GCC_PURE;
 
 pa_bool_t pa_frame_aligned(size_t l, const pa_sample_spec *ss) PA_GCC_PURE;
diff --git a/src/pulsecore/sink-input.c b/src/pulsecore/sink-input.c
index ae89aed..a6ddb15 100644
--- a/src/pulsecore/sink-input.c
+++ b/src/pulsecore/sink-input.c
@@ -32,7 +32,7 @@
 #include <pulse/util.h>
 #include <pulse/internal.h>
 
-#include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 #include <pulsecore/core-subscribe.h>
 #include <pulsecore/log.h>
 #include <pulsecore/play-memblockq.h>
diff --git a/src/pulsecore/sink.c b/src/pulsecore/sink.c
index a8a91d6..175cfe5 100644
--- a/src/pulsecore/sink.c
+++ b/src/pulsecore/sink.c
@@ -42,6 +42,7 @@
 #include <pulsecore/namereg.h>
 #include <pulsecore/core-util.h>
 #include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 #include <pulsecore/core-subscribe.h>
 #include <pulsecore/log.h>
 #include <pulsecore/macro.h>
diff --git a/src/pulsecore/sound-file-stream.c b/src/pulsecore/sound-file-stream.c
index 24d3314..4101dea 100644
--- a/src/pulsecore/sound-file-stream.c
+++ b/src/pulsecore/sound-file-stream.c
@@ -39,6 +39,7 @@
 #include <pulsecore/log.h>
 #include <pulsecore/thread-mq.h>
 #include <pulsecore/core-util.h>
+#include <pulsecore/mix.h>
 #include <pulsecore/sndfile-util.h>
 
 #include "sound-file-stream.h"
diff --git a/src/pulsecore/source-output.c b/src/pulsecore/source-output.c
index 8775c1e..0f1a946 100644
--- a/src/pulsecore/source-output.c
+++ b/src/pulsecore/source-output.c
@@ -32,7 +32,7 @@
 #include <pulse/util.h>
 #include <pulse/internal.h>
 
-#include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 #include <pulsecore/core-subscribe.h>
 #include <pulsecore/log.h>
 #include <pulsecore/namereg.h>
diff --git a/src/pulsecore/source.c b/src/pulsecore/source.c
index 8c16606..f336119 100644
--- a/src/pulsecore/source.c
+++ b/src/pulsecore/source.c
@@ -40,7 +40,7 @@
 #include <pulsecore/namereg.h>
 #include <pulsecore/core-subscribe.h>
 #include <pulsecore/log.h>
-#include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 #include <pulsecore/flist.h>
 
 #include "source.h"
diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c
index e5190c7..4980dd0 100644
--- a/src/tests/mix-test.c
+++ b/src/tests/mix-test.c
@@ -33,6 +33,7 @@
 #include <pulsecore/endianmacros.h>
 #include <pulsecore/memblock.h>
 #include <pulsecore/sample-util.h>
+#include <pulsecore/mix.h>
 
 
 /* PA_SAMPLE_U8 */

commit bc8b6eaf95c0b1343194b273ae2f75a6aaca6add
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:26:58 2013 +0100

    tests: Volume-test seems to be long-running, set timeout
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/tests/volume-test.c b/src/tests/volume-test.c
index f47994d..d8d2149 100644
--- a/src/tests/volume-test.c
+++ b/src/tests/volume-test.c
@@ -143,6 +143,7 @@ int main(int argc, char *argv[]) {
     s = suite_create("Volume");
     tc = tcase_create("volume");
     tcase_add_test(tc, volume_test);
+    tcase_set_timeout(tc, 120);
     suite_add_tcase(s, tc);
 
     sr = srunner_create(s);

commit 30ce3a14e5ae1cd316a18bec95b831c07ac57a1a
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:57 2013 +0100

    resampler: Resample first followed by remapping if have more out channels than in channels
    
    The patch intends to reduce computational load when resampling AND remapping. The PA
    resampler performs the following steps:
    
    sample format conversion -> remapping -> resampling -> sample format conversion
    
    In case the number of output channels is higher than the number of input channels, the
    resampler has to be run more often than necessary. E.g. in case of mono to 4-channel remapping,
    the resampler runs on 4 channels separately.
    
    To Ãmprove this, the PA resampler pipeline is made adaptive:
    
    if out-channels <= in-channels:
    sample format conversion -> remapping -> resampling -> sample format conversion
    if out-channels > in-channels:
    sample format conversion -> resampling -> remapping -> sample format conversion
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 649294d..0a3a678 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -67,6 +67,7 @@ struct pa_resampler {
     bool remap_buf_contains_leftover_data;
 
     pa_sample_format_t work_format;
+    uint8_t work_channels;
 
     pa_convert_func_t to_work_format_func;
     pa_convert_func_t from_work_format_func;
@@ -328,6 +329,16 @@ pa_resampler* pa_resampler_new(
         }
     }
 
+    if (r->o_ss.channels <= r->i_ss.channels)
+        r->work_channels = r->o_ss.channels;
+    else
+        r->work_channels = r->i_ss.channels;
+
+    pa_log_debug("Resampler:\n  rate %d -> %d (method %s),\n  format %s -> %s (intermediate %s),\n  channels %d -> %d (resampling %d)",
+        a->rate, b->rate, pa_resample_method_to_string(r->method),
+        pa_sample_format_to_string(a->format), pa_sample_format_to_string(b->format), pa_sample_format_to_string(r->work_format),
+        a->channels, b->channels, r->work_channels);
+
     /* initialize implementation */
     if (init_table[method](r) < 0)
         goto fail;
@@ -1140,10 +1151,10 @@ static pa_memchunk *resample(pa_resampler *r, pa_memchunk *input) {
         return input;
 
     in_n_samples = (unsigned) (input->length / r->w_sz);
-    in_n_frames = (unsigned) (in_n_samples / r->o_ss.channels);
+    in_n_frames = (unsigned) (in_n_samples / r->work_channels);
 
     out_n_frames = ((in_n_frames*r->o_ss.rate)/r->i_ss.rate)+EXTRA_FRAMES;
-    out_n_samples = out_n_frames * r->o_ss.channels;
+    out_n_samples = out_n_frames * r->work_channels;
 
     r->resample_buf.index = 0;
     r->resample_buf.length = r->w_sz * out_n_samples;
@@ -1157,7 +1168,7 @@ static pa_memchunk *resample(pa_resampler *r, pa_memchunk *input) {
     }
 
     r->impl_resample(r, input, in_n_frames, &r->resample_buf, &out_n_frames);
-    r->resample_buf.length = out_n_frames * r->w_sz * r->o_ss.channels;
+    r->resample_buf.length = out_n_frames * r->w_sz * r->work_channels;
 
     return &r->resample_buf;
 }
@@ -1210,8 +1221,15 @@ void pa_resampler_run(pa_resampler *r, const pa_memchunk *in, pa_memchunk *out)
 
     buf = (pa_memchunk*) in;
     buf = convert_to_work_format(r, buf);
-    buf = remap_channels(r, buf);
-    buf = resample(r, buf);
+    /* Try to save resampling effort: if we have more output channels than
+     * input channels, do resampling first, then remapping. */
+    if (r->o_ss.channels <= r->i_ss.channels) {
+        buf = remap_channels(r, buf);
+        buf = resample(r, buf);
+    } else {
+        buf = resample(r, buf);
+        buf = remap_channels(r, buf);
+    }
 
     if (buf->length) {
         buf = convert_from_work_format(r, buf);
@@ -1276,8 +1294,8 @@ static void libsamplerate_resample(pa_resampler *r, const pa_memchunk *input, un
     pa_assert_se(src_process(r->src.state, &data) == 0);
 
     if (data.input_frames_used < in_n_frames) {
-        void *leftover_data = data.data_in + data.input_frames_used * r->o_ss.channels;
-        size_t leftover_length = (in_n_frames - data.input_frames_used) * sizeof(float) * r->o_ss.channels;
+        void *leftover_data = data.data_in + data.input_frames_used * r->work_channels;
+        size_t leftover_length = (in_n_frames - data.input_frames_used) * sizeof(float) * r->work_channels;
 
         save_leftover(r, leftover_data, leftover_length);
     }
@@ -1413,7 +1431,7 @@ static int speex_init(pa_resampler *r) {
 
     pa_log_info("Choosing speex quality setting %i.", q);
 
-    if (!(r->speex.state = speex_resampler_init(r->o_ss.channels, r->i_ss.rate, r->o_ss.rate, q, &err)))
+    if (!(r->speex.state = speex_resampler_init(r->work_channels, r->i_ss.rate, r->o_ss.rate, q, &err)))
         return -1;
 
     return 0;
@@ -1432,7 +1450,7 @@ static void trivial_resample(pa_resampler *r, const pa_memchunk *input, unsigned
     pa_assert(output);
     pa_assert(out_n_frames);
 
-    fz = r->w_sz * r->o_ss.channels;
+    fz = r->w_sz * r->work_channels;
 
     src = pa_memblock_acquire_chunk(input);
     dst = pa_memblock_acquire_chunk(output);
@@ -1616,7 +1634,7 @@ static void ffmpeg_resample(pa_resampler *r, const pa_memchunk *input, unsigned
     pa_assert(output);
     pa_assert(out_n_frames);
 
-    for (c = 0; c < r->o_ss.channels; c++) {
+    for (c = 0; c < r->work_channels; c++) {
         unsigned u;
         pa_memblock *b, *w;
         int16_t *p, *t, *k, *q, *s;
@@ -1631,7 +1649,7 @@ static void ffmpeg_resample(pa_resampler *r, const pa_memchunk *input, unsigned
         k = p;
         for (u = 0; u < in_n_frames; u++) {
             *k = *t;
-            t += r->o_ss.channels;
+            t += r->work_channels;
             k ++;
         }
         pa_memblock_release(input->memblock);
@@ -1645,7 +1663,7 @@ static void ffmpeg_resample(pa_resampler *r, const pa_memchunk *input, unsigned
                                              q, p,
                                              &consumed_frames,
                                              (int) in_n_frames, (int) *out_n_frames,
-                                             c >= (unsigned) (r->o_ss.channels-1));
+                                             c >= (unsigned) (r->work_channels-1));
 
         pa_memblock_release(b);
         pa_memblock_unref(b);
@@ -1659,7 +1677,7 @@ static void ffmpeg_resample(pa_resampler *r, const pa_memchunk *input, unsigned
         for (u = 0; u < used_frames; u++) {
             *s = *q;
             q++;
-            s += r->o_ss.channels;
+            s += r->work_channels;
         }
         pa_memblock_release(output->memblock);
         pa_memblock_release(w);

commit 505a57d32d8c49ddfeca928ed988ba3b279aff3e
Author: Tanu Kaskinen <tanuk at iki.fi>
Date:   Fri Feb 15 21:24:36 2013 +0200

    echo-cancel: Fix uninitialized variable dotp_xf_xf of AEC struct
    
    Initialize the variable to zero by using pa_xnew0() instead of
    pa_xnew(). This also allows us to remove a bunch of other zero
    initialization statements.
    
    Reported-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index baf59cf..67a2794 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -70,13 +70,8 @@ static REAL dotp_sse(REAL a[], REAL b[])
 
 AEC* AEC_init(int RATE, int have_vector)
 {
-  AEC *a = pa_xnew(AEC, 1);
-  a->hangover = 0;
-  memset(a->x, 0, sizeof(a->x));
-  memset(a->xf, 0, sizeof(a->xf));
-  memset(a->w_arr, 0, sizeof(a->w_arr));
+  AEC *a = pa_xnew0(AEC, 1);
   a->j = NLMS_EXT;
-  a->delta = 0.0f;
   AEC_setambient(a, NoiseFloor);
   a->dfast = a->dslow = M75dB_PCM;
   a->xfast = a->xslow = M80dB_PCM;
@@ -90,8 +85,6 @@ AEC* AEC_init(int RATE, int have_vector)
   a->aes_y2 = M0dB;
 
   a->fdwdisplay = -1;
-  a->dumpcnt = 0;
-  memset(a->ws, 0, sizeof(a->ws));
 
   if (have_vector) {
       /* Get a 16-byte aligned location */

commit 90276fe18d60455cbfdc60e3f0af851e22956f2f
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:55 2013 +0100

    echo-cancel: Use proper float constants in adrian-aec
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 30303fd..baf59cf 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -33,7 +33,7 @@
 /* Vector Dot Product */
 static REAL dotp(REAL a[], REAL b[])
 {
-  REAL sum0 = 0.0, sum1 = 0.0;
+  REAL sum0 = 0.0f, sum1 = 0.0f;
   int j;
 
   for (j = 0; j < NLMS_LEN; j += 2) {
@@ -138,11 +138,11 @@ static float AEC_dtd(AEC *a, REAL d, REAL x)
   a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
 
   if (a->xfast < M70dB_PCM) {
-    return 0.0;   // no Spk signal
+    return 0.0f;   // no Spk signal
   }
 
   if (a->dfast < M70dB_PCM) {
-    return 0.0;   // no Mic signal
+    return 0.0f;   // no Mic signal
   }
 
   // ratio of NFRs
@@ -206,7 +206,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
   // optimize: iterative dotp(xf, xf)
   a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
 
-  if (stepsize > 0.0) {
+  if (stepsize > 0.0f) {
     // calculate variable step size
     REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
 

commit 8d9c41e84c01c862aa8e942eba97c3ceba185e71
Author: Tanu Kaskinen <tanuk at iki.fi>
Date:   Fri Feb 15 21:16:37 2013 +0200

    echo-cancel: Fix tap weights array alignment

diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 7a7bce4..30303fd 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -95,7 +95,7 @@ AEC* AEC_init(int RATE, int have_vector)
 
   if (have_vector) {
       /* Get a 16-byte aligned location */
-      a->w = (REAL *) (((uintptr_t) a->w_arr) + (((uintptr_t) a->w_arr) % 16));
+      a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
       a->dotp = dotp_sse;
   } else {
       /* We don't care about alignment, just use the array as-is */

commit 764eabd1092c825d0633f7eb471ca2822e91994a
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:54 2013 +0100

    echo-cancel: Fix zeroing of w in AEC_leaky()
    
    bug probably caused by alignment requirement; sizeof(a->w) is a pointer, sizeof(a->w_arr) is an array
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 73505a0..7a7bce4 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -174,7 +174,7 @@ static void AEC_leaky(AEC *a)
     } else if (1 == a->hangover) {
       --(a->hangover);
       // My Leaky NLMS is to erase vector w when hangover expires
-      memset(a->w, 0, sizeof(a->w));
+      memset(a->w_arr, 0, sizeof(a->w_arr));
     }
   }
 }

commit bf29c8dcf71d26c493f24eb12b1a32efff4c2a0b
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:53 2013 +0100

    echo-cancel: Fix memory leak / deinitialization of Adrian AEC
    
    was simply absent
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 1476ee4..73505a0 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -106,6 +106,17 @@ AEC* AEC_init(int RATE, int have_vector)
   return a;
 }
 
+void AEC_done(AEC *a) {
+    pa_assert(a);
+
+    pa_xfree(a->Fx);
+    pa_xfree(a->Fe);
+    pa_xfree(a->acMic);
+    pa_xfree(a->acSpk);
+    pa_xfree(a->cutoff);
+    pa_xfree(a);
+}
+
 // Adrian soft decision DTD
 // (Dual Average Near-End to Far-End signal Ratio DTD)
 // This algorithm uses exponential smoothing with differnt
diff --git a/src/modules/echo-cancel/adrian-aec.h b/src/modules/echo-cancel/adrian-aec.h
index e733f83..6271774 100644
--- a/src/modules/echo-cancel/adrian-aec.h
+++ b/src/modules/echo-cancel/adrian-aec.h
@@ -351,7 +351,8 @@ static  void AEC_leaky(AEC *a);
  */
 static  REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
 
-  AEC* AEC_init(int RATE, int have_vector);
+AEC* AEC_init(int RATE, int have_vector);
+void AEC_done(AEC *a);
 
 /* Acoustic Echo Cancellation and Suppression of one sample
  * in   d:  microphone signal with echo
diff --git a/src/modules/echo-cancel/adrian.c b/src/modules/echo-cancel/adrian.c
index 91e3b35..40e9654 100644
--- a/src/modules/echo-cancel/adrian.c
+++ b/src/modules/echo-cancel/adrian.c
@@ -111,6 +111,8 @@ void pa_adrian_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *
 }
 
 void pa_adrian_ec_done(pa_echo_canceller *ec) {
-    pa_xfree(ec->params.priv.adrian.aec);
-    ec->params.priv.adrian.aec = NULL;
+    if (ec->params.priv.adrian.aec) {
+        AEC_done(ec->params.priv.adrian.aec);
+        ec->params.priv.adrian.aec = NULL;
+    }
 }
diff --git a/src/modules/echo-cancel/adrian.h b/src/modules/echo-cancel/adrian.h
index 639fa9e..4ace392 100644
--- a/src/modules/echo-cancel/adrian.h
+++ b/src/modules/echo-cancel/adrian.h
@@ -28,4 +28,5 @@
 typedef struct AEC AEC;
 
 AEC* AEC_init(int RATE, int have_vector);
+void AEC_done(AEC *a);
 int AEC_doAEC(AEC *a, int d_, int x_);

commit c43e88553735d480aaf0b1525c64a37beb1d38f4
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:52 2013 +0100

    echo-cancel: Output echo canceller name if invalid
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/module-echo-cancel.c b/src/modules/echo-cancel/module-echo-cancel.c
index 4a36bb0..8ce6330 100644
--- a/src/modules/echo-cancel/module-echo-cancel.c
+++ b/src/modules/echo-cancel/module-echo-cancel.c
@@ -1623,7 +1623,7 @@ static int init_common(pa_modargs *ma, struct userdata *u, pa_sample_spec *sourc
 
     ec_string = pa_modargs_get_value(ma, "aec_method", DEFAULT_ECHO_CANCELLER);
     if ((ec_method = get_ec_method_from_string(ec_string)) < 0) {
-        pa_log("Invalid echo canceller implementation");
+        pa_log("Invalid echo canceller implementation '%s'", ec_string);
         goto fail;
     }
 

commit db7415b7e958d2180db6385c76d5dea5ccbbe156
Author: Peter Meerwald <p.meerwald at bct-electronic.com>
Date:   Wed Feb 13 17:26:51 2013 +0100

    echo-cancel: Add function pa_echo_canceller_blocksize_power2()
    
    computes EC block size in frames (rounded down to nearest power-of-2) based
    on sample rate and milliseconds
    
    move code from speex AEC implementation to module-echo-cancel such that
    functionality can be reused by other AEC implementations
    
    Signed-off-by: Peter Meerwald <p.meerwald at bct-electronic.com>

diff --git a/src/modules/echo-cancel/echo-cancel.h b/src/modules/echo-cancel/echo-cancel.h
index e7eed30..c33b1ef 100644
--- a/src/modules/echo-cancel/echo-cancel.h
+++ b/src/modules/echo-cancel/echo-cancel.h
@@ -131,6 +131,10 @@ struct pa_echo_canceller {
 void pa_echo_canceller_get_capture_volume(pa_echo_canceller *ec, pa_cvolume *v);
 void pa_echo_canceller_set_capture_volume(pa_echo_canceller *ec, pa_cvolume *v);
 
+/* Computes EC block size in frames (rounded down to nearest power-of-2) based
+ * on sample rate and milliseconds. */
+uint32_t pa_echo_canceller_blocksize_power2(unsigned rate, unsigned ms);
+
 /* Null canceller functions */
 pa_bool_t pa_null_ec_init(pa_core *c, pa_echo_canceller *ec,
                            pa_sample_spec *source_ss, pa_channel_map *source_map,
diff --git a/src/modules/echo-cancel/module-echo-cancel.c b/src/modules/echo-cancel/module-echo-cancel.c
index 11ad1de..4a36bb0 100644
--- a/src/modules/echo-cancel/module-echo-cancel.c
+++ b/src/modules/echo-cancel/module-echo-cancel.c
@@ -1569,6 +1569,21 @@ void pa_echo_canceller_set_capture_volume(pa_echo_canceller *ec, pa_cvolume *v)
     }
 }
 
+uint32_t pa_echo_canceller_blocksize_power2(unsigned rate, unsigned ms) {
+    unsigned nframes = (rate * ms) / 1000;
+    uint32_t y = 1 << ((8 * sizeof(uint32_t)) - 2);
+
+    assert(rate >= 4000);
+    assert(ms >= 1);
+
+    /* nframes should be a power of 2, round down to nearest power of two */
+    while (y > nframes)
+        y >>= 1;
+
+    assert(y >= 1);
+    return y;
+}
+
 static pa_echo_canceller_method_t get_ec_method_from_string(const char *method) {
     if (pa_streq(method, "null"))
         return PA_ECHO_CANCELLER_NULL;
diff --git a/src/modules/echo-cancel/speex.c b/src/modules/echo-cancel/speex.c
index 6c532f2..9469092 100644
--- a/src/modules/echo-cancel/speex.c
+++ b/src/modules/echo-cancel/speex.c
@@ -151,7 +151,7 @@ pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
                            uint32_t *nframes, const char *args)
 {
     int rate;
-    uint32_t y, frame_size_ms, filter_size_ms;
+    uint32_t frame_size_ms, filter_size_ms;
     pa_modargs *ma;
 
     if (!(ma = pa_modargs_new(args, valid_modargs))) {
@@ -174,16 +174,10 @@ pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
     pa_speex_ec_fixate_spec(source_ss, source_map, sink_ss, sink_map);
 
     rate = source_ss->rate;
-    *nframes = (rate * frame_size_ms) / 1000;
-    /* nframes should be a power of 2, round down to nearest power of two */
-    y = 1 << ((8 * sizeof (uint32_t)) - 2);
-    while (y > *nframes)
-      y >>= 1;
-    *nframes = y;
+    *nframes = pa_echo_canceller_blocksize_power2(rate, frame_size_ms);
 
     pa_log_debug ("Using nframes %d, channels %d, rate %d", *nframes, source_ss->channels, source_ss->rate);
-
-    ec->params.priv.speex.state = speex_echo_state_init_mc (*nframes, (rate * filter_size_ms) / 1000, source_ss->channels, source_ss->channels);
+    ec->params.priv.speex.state = speex_echo_state_init_mc(*nframes, (rate * filter_size_ms) / 1000, source_ss->channels, source_ss->channels);
 
     if (!ec->params.priv.speex.state)
         goto fail;

commit e845c86c64e55c4596d5e72ddb42e995cc6b1c05
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:26:49 2013 +0100

    build-sys: Properly check for HAVE_DBUS in module-ladspa-sink
    
    prevents
      CC     module_ladspa_sink_la-module-ladspa-sink.lo
    modules/module-ladspa-sink.c:1332:5: warning: "HAVE_DBUS" is not defined
    modules/module-ladspa-sink.c:1370:5: warning: "HAVE_DBUS" is not defined
    in case HAVE_DBUS is not available
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/src/modules/module-ladspa-sink.c b/src/modules/module-ladspa-sink.c
index e18e674..b361493 100644
--- a/src/modules/module-ladspa-sink.c
+++ b/src/modules/module-ladspa-sink.c
@@ -1329,7 +1329,7 @@ int pa__init(pa_module*m) {
     pa_sink_put(u->sink);
     pa_sink_input_put(u->sink_input);
 
-#if HAVE_DBUS
+#ifdef HAVE_DBUS
     dbus_init(u);
 #endif
 
@@ -1367,7 +1367,7 @@ void pa__done(pa_module*m) {
     /* See comments in sink_input_kill_cb() above regarding
     * destruction order! */
 
-#if HAVE_DBUS
+#ifdef HAVE_DBUS
     dbus_done(u);
 #endif
 

commit 425bdc10ec811d03441340dd460b1f0b4b1bbdba
Author: Peter Meerwald <pmeerw at pmeerw.net>
Date:   Wed Feb 13 17:26:48 2013 +0100

    build-sys: ALSA use-case manager requires ALSA library >= 1.0.24
    
    alsa/use-case.h in needed
    require at least version 1.0.24 in configure.ac
    
    prevents the following error at compile time:
      CC     libalsa_util_la-alsa-util.lo
    In file included from modules/alsa/alsa-mixer.h:51,
                     from modules/alsa/alsa-util.h:36,
                     from modules/alsa/alsa-util.c:46:
    modules/alsa/alsa-ucm.h:27:22: error: use-case.h: No such file or directory
    In file included from modules/alsa/alsa-mixer.h:51,
                     from modules/alsa/alsa-util.h:36,
                     from modules/alsa/alsa-util.c:46:
    modules/alsa/alsa-ucm.h:89: error: expected â€˜)â€™ before â€˜*â€™ token
    modules/alsa/alsa-ucm.h:169: error: expected specifier-qualifier-list before â€˜snd_use_case_mgr_tâ€™
    make[3]: *** [libalsa_util_la-alsa-util.lo] Error 1
    
    Signed-off-by: Peter Meerwald <pmeerw at pmeerw.net>

diff --git a/configure.ac b/configure.ac
index 6d340fd..b840b81 100644
--- a/configure.ac
+++ b/configure.ac
@@ -774,11 +774,11 @@ AC_ARG_ENABLE([alsa],
     AS_HELP_STRING([--disable-alsa],[Disable optional ALSA support]))
 
 AS_IF([test "x$enable_alsa" != "xno"],
-    [PKG_CHECK_MODULES(ASOUNDLIB, [ alsa >= 1.0.19 ], HAVE_ALSA=1, HAVE_ALSA=0)],
+    [PKG_CHECK_MODULES(ASOUNDLIB, [ alsa >= 1.0.24 ], HAVE_ALSA=1, HAVE_ALSA=0)],
     HAVE_ALSA=0)
 
 AS_IF([test "x$enable_alsa" = "xyes" && test "x$HAVE_ALSA" = "x0"],
-    [AC_MSG_ERROR([*** Needed alsa >= 1.0.19 support not found])])
+    [AC_MSG_ERROR([*** Needed alsa >= 1.0.24 support not found])])
 
 AC_SUBST(ASOUNDLIB_CFLAGS)
 AC_SUBST(ASOUNDLIB_LIBS)