[pulseaudio-commits] [SCM] PulseAudio Sound Server branch, master, updated. v0.9.16-test5-76-ga0f01dd
Lennart Poettering
gitmailer-noreply at 0pointer.de
Sun Aug 23 13:20:55 PDT 2009
This is an automated email from the git hooks/post-receive script. It was
generated because of a push to the "PulseAudio Sound Server" repository.
The master branch has been updated
from d6fb8d10819bebc1cee203de7330cceeafde9fed (commit)
- Log -----------------------------------------------------------------
a0f01dd port a few things over to use xmalloc and friends instead of low-level libc malloc/free directly
ab5ac06 Merge commit 'wtay/optimize'
6076cef remap: make the MMX code pretier
6e5dbed remap: add MMX mono to stereo
e961efc remap: init the do_remap function to NULL
28baa53 remap: allow specialisations to install NULL
ac1f2e0 remap: move remapping code in separate file
a3f4a4f resamples; refactor the channel remapping bits
05fef5f sconv: allow for setting custom functions
c1b6a87 alsa-sink: reduce the amount of smoother updates
f8ffe0d svolume: cleanups
f09b511 whitespace fixes
3cc1278 resampler: avoid some multiplies when we can
aeae567 svolume: add comment
8aa86f5 arm: implement ARM cpu detection
078bde1 x86: keep the cpu flags local
370016c svolume: fix compilation in 32bits
d2389ef sample: manually inline table lookups
548b735 resampler: fix identity check
d04a6e9 resample: fix counters
b4e9942 resample: refactor the channel remapping a little
bd49d43 svolume: add CPU guards around code
951bf1b svolume: add ARM optimized volume scaling
a98fa95 svolume: remove unneeded compare
601e5f1 resampler: cache integer channel_map
25724cd Get rid of liboil
591baac volume: remove ref functions
f24c24c volume: improved comments
a123544 volume: make the benchmark more meaningfull
dcae9a3 svolume: add some comments
e396fe6 cpu-x86: guard header with ifdef
563cb2d main: hook up cpu detection code
a83f552 cpu-x86: add cpu detection code and helpers
5998cf9 svolume: improve SSE and MMX code
7086784 volume_sse: add sse optimisations
08f3e16 volume_mmx: fix mmx code a bit
3a0b012 volume: add first mmx optimized function
2d73f13 samples-util: add padding to volume array
3d00896 sample-util: move volume code to separate file
e71e644 sample-util: move some functions around
5b8b654 sample-utils: coding style cleanup
26839c4 sample-utils: split out functions from case
-----------------------------------------------------------------------
Summary of changes:
configure.ac | 6 -
src/Makefile.am | 5 +
src/daemon/main.c | 9 +-
src/modules/alsa/alsa-sink.c | 26 ++-
src/pulse/sample.c | 49 ++--
src/pulsecore/core-util.c | 2 +-
src/pulsecore/cpu-arm.c | 139 +++++++
.../alsa/alsa-source.h => pulsecore/cpu-arm.h} | 26 +-
src/pulsecore/cpu-x86.c | 122 ++++++
src/pulsecore/cpu-x86.h | 68 ++++
src/pulsecore/remap.c | 204 ++++++++++
src/{modules/oss/oss-util.h => pulsecore/remap.h} | 31 +-
src/pulsecore/remap_mmx.c | 148 ++++++++
src/pulsecore/resampler.c | 229 ++++--------
src/pulsecore/sample-util.c | 396 +++-----------------
src/pulsecore/sample-util.h | 5 +
src/pulsecore/sconv-s16le.c | 42 +--
src/pulsecore/sconv.c | 188 ++++++----
src/pulsecore/sconv.h | 6 +
src/pulsecore/svolume_arm.c | 195 ++++++++++
src/pulsecore/svolume_c.c | 335 +++++++++++++++++
src/pulsecore/svolume_mmx.c | 313 ++++++++++++++++
src/pulsecore/svolume_sse.c | 314 ++++++++++++++++
src/tests/envelope-test.c | 3 -
src/tests/mix-test.c | 3 -
src/tests/remix-test.c | 3 -
src/tests/resampler-test.c | 3 -
27 files changed, 2200 insertions(+), 670 deletions(-)
create mode 100644 src/pulsecore/cpu-arm.c
copy src/{modules/alsa/alsa-source.h => pulsecore/cpu-arm.h} (60%)
create mode 100644 src/pulsecore/cpu-x86.c
create mode 100644 src/pulsecore/cpu-x86.h
create mode 100644 src/pulsecore/remap.c
copy src/{modules/oss/oss-util.h => pulsecore/remap.h} (53%)
create mode 100644 src/pulsecore/remap_mmx.c
create mode 100644 src/pulsecore/svolume_arm.c
create mode 100644 src/pulsecore/svolume_c.c
create mode 100644 src/pulsecore/svolume_mmx.c
create mode 100644 src/pulsecore/svolume_sse.c
-----------------------------------------------------------------------
commit 26839c4b9eb549eebf8db6eae2399ed6fd94efa8
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Tue Aug 11 15:15:57 2009 +0200
sample-utils: split out functions from case
Move the volume functions out of the switch case and use a table indexed by the
sample format to find the volume function.
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 5b8ccf5..ef43567 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -690,361 +690,381 @@ size_t pa_mix(
return length;
}
+typedef struct pa_volume_funcs {
+ void (*u8) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*alaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*ulaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s16ne) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s16re) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*float32ne) (float *samples, float *volumes, unsigned channels, unsigned length);
+ void (*float32re) (float *samples, float *volumes, unsigned channels, unsigned length);
+ void (*s32ne) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s32re) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s24ne) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s24re) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s24_32ne) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+ void (*s24_32re) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
+} pa_volume_funcs;
+
+static void
+pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) *samples - 0x80;
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+ *samples++ = (uint8_t) (t + 0x80);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
-void pa_volume_memchunk(
- pa_memchunk*c,
- const pa_sample_spec *spec,
- const pa_cvolume *volume) {
-
- void *ptr;
-
- pa_assert(c);
- pa_assert(spec);
- pa_assert(c->length % pa_frame_size(spec) == 0);
- pa_assert(volume);
-
- if (pa_memblock_is_silence(c->memblock))
- return;
-
- if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM))
- return;
-
- if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) {
- pa_silence_memchunk(c, spec);
- return;
- }
-
- ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
-
- switch (spec->format) {
-
- case PA_SAMPLE_S16NE: {
- int16_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
-
- calc_linear_integer_volume(linear, volume);
-
- e = (int16_t*) ptr + c->length/sizeof(int16_t);
-
- for (channel = 0, d = ptr; d < e; d++) {
- int32_t t, hi, lo;
-
- /* Multiplying the 32bit volume factor with the 16bit
- * sample might result in an 48bit value. We want to
- * do without 64 bit integers and hence do the
- * multiplication independantly for the HI and LO part
- * of the volume. */
-
- hi = linear[channel] >> 16;
- lo = linear[channel] & 0xFFFF;
-
- t = (int32_t)(*d);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *d = (int16_t) t;
-
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
-
- break;
- }
-
- case PA_SAMPLE_S16RE: {
- int16_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
-
- calc_linear_integer_volume(linear, volume);
-
- e = (int16_t*) ptr + c->length/sizeof(int16_t);
-
- for (channel = 0, d = ptr; d < e; d++) {
- int32_t t, hi, lo;
+static void
+pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- hi = linear[channel] >> 16;
- lo = linear[channel] & 0xFFFF;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- t = (int32_t) PA_INT16_SWAP(*d);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *d = PA_INT16_SWAP((int16_t) t);
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
+ t = (int32_t) st_alaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
- break;
- }
-
- case PA_SAMPLE_S32NE: {
- int32_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- calc_linear_integer_volume(linear, volume);
+static void
+pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- e = (int32_t*) ptr + c->length/sizeof(int32_t);
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- for (channel = 0, d = ptr; d < e; d++) {
- int64_t t;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int64_t)(*d);
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *d = (int32_t) t;
+ t = (int32_t) st_ulaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
-
- case PA_SAMPLE_S32RE: {
- int32_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
-
- calc_linear_integer_volume(linear, volume);
-
- e = (int32_t*) ptr + c->length/sizeof(int32_t);
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- for (channel = 0, d = ptr; d < e; d++) {
- int64_t t;
+static void
+pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- t = (int64_t) PA_INT32_SWAP(*d);
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *d = PA_INT32_SWAP((int32_t) t);
+ length /= sizeof (int16_t);
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- case PA_SAMPLE_S24NE: {
- uint8_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ /* Multiplying the 32bit volume factor with the 16bit
+ * sample might result in an 48bit value. We want to
+ * do without 64 bit integers and hence do the
+ * multiplication independantly for the HI and LO part
+ * of the volume. */
- calc_linear_integer_volume(linear, volume);
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- e = (uint8_t*) ptr + c->length;
+ t = (int32_t)(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (int16_t) t;
- for (channel = 0, d = ptr; d < e; d += 3) {
- int64_t t;
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- t = (int64_t)((int32_t) (PA_READ24NE(d) << 8));
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8);
+static void
+pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ length /= sizeof (int16_t);
- case PA_SAMPLE_S24RE: {
- uint8_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- calc_linear_integer_volume(linear, volume);
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- e = (uint8_t*) ptr + c->length;
+ t = (int32_t) PA_INT16_SWAP(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = PA_INT16_SWAP((int16_t) t);
- for (channel = 0, d = ptr; d < e; d += 3) {
- int64_t t;
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- t = (int64_t)((int32_t) (PA_READ24RE(d) << 8));
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8);
+static void
+pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ length /= sizeof (float);
- case PA_SAMPLE_S24_32NE: {
- uint32_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ for (channel = 0; length; length--) {
+ *samples++ *= volumes[channel];
- calc_linear_integer_volume(linear, volume);
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- e = (uint32_t*) ptr + c->length/sizeof(uint32_t);
+static void
+pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- for (channel = 0, d = ptr; d < e; d++) {
- int64_t t;
+ length /= sizeof (float);
- t = (int64_t) ((int32_t) (*d << 8));
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *d = ((uint32_t) ((int32_t) t)) >> 8;
+ for (channel = 0; length; length--) {
+ float t;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ t = PA_FLOAT32_SWAP(*samples);
+ t *= volumes[channel];
+ *samples++ = PA_FLOAT32_SWAP(t);
- case PA_SAMPLE_S24_32RE: {
- uint32_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- calc_linear_integer_volume(linear, volume);
+static void
+pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- e = (uint32_t*) ptr + c->length/sizeof(uint32_t);
+ length /= sizeof (int32_t);
- for (channel = 0, d = ptr; d < e; d++) {
- int64_t t;
+ for (channel = 0; length; length--) {
+ int64_t t;
- t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8));
- t = (t * linear[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+ t = (int64_t)(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = (int32_t) t;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- case PA_SAMPLE_U8: {
- uint8_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+static void
+pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- calc_linear_integer_volume(linear, volume);
+ length /= sizeof (int32_t);
- e = (uint8_t*) ptr + c->length;
+ for (channel = 0; length; length--) {
+ int64_t t;
- for (channel = 0, d = ptr; d < e; d++) {
- int32_t t, hi, lo;
+ t = (int64_t) PA_INT32_SWAP(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_INT32_SWAP((int32_t) t);
- hi = linear[channel] >> 16;
- lo = linear[channel] & 0xFFFF;
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- t = (int32_t) *d - 0x80;
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
- *d = (uint8_t) (t + 0x80);
+static void
+pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ e = samples + length;
- case PA_SAMPLE_ULAW: {
- uint8_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
- calc_linear_integer_volume(linear, volume);
+ t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
- e = (uint8_t*) ptr + c->length;
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- for (channel = 0, d = ptr; d < e; d++) {
- int32_t t, hi, lo;
+static void
+pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
- hi = linear[channel] >> 16;
- lo = linear[channel] & 0xFFFF;
+ e = samples + length;
- t = (int32_t) st_ulaw2linear16(*d);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+ t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
- case PA_SAMPLE_ALAW: {
- uint8_t *d, *e;
- unsigned channel;
- int32_t linear[PA_CHANNELS_MAX];
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- calc_linear_integer_volume(linear, volume);
+static void
+pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- e = (uint8_t*) ptr + c->length;
+ length /= sizeof (uint32_t);
- for (channel = 0, d = ptr; d < e; d++) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int64_t t;
- hi = linear[channel] >> 16;
- lo = linear[channel] & 0xFFFF;
+ t = (int64_t) ((int32_t) (*samples << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
- t = (int32_t) st_alaw2linear16(*d);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *d = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
- break;
- }
+static void
+pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
- case PA_SAMPLE_FLOAT32NE: {
- float *d;
- int skip;
- unsigned n;
- unsigned channel;
+ length /= sizeof (uint32_t);
- d = ptr;
- skip = (int) (spec->channels * sizeof(float));
- n = (unsigned) (c->length/sizeof(float)/spec->channels);
+ for (channel = 0; length; length--) {
+ int64_t t;
- for (channel = 0; channel < spec->channels; channel ++) {
- float v, *t;
+ t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
- if (PA_UNLIKELY(volume->values[channel] == PA_VOLUME_NORM))
- continue;
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
- v = (float) pa_sw_volume_to_linear(volume->values[channel]);
- t = d + channel;
- oil_scalarmult_f32(t, skip, t, skip, &v, (int) n);
- }
- break;
- }
+typedef void (*pa_do_volume_func) (void *samples, void *volumes, unsigned channels, unsigned length);
+typedef void (*pa_calc_volume_func) (void *volumes, const pa_cvolume *volume);
+
+typedef union {
+ float f;
+ uint32_t i;
+} volume_val;
+
+static pa_calc_volume_func calc_volume_funcs[] =
+{
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_float_volume,
+ (pa_calc_volume_func) calc_linear_float_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume,
+ (pa_calc_volume_func) calc_linear_integer_volume
+};
+
+static pa_do_volume_func do_volume_funcs[] =
+{
+ (pa_do_volume_func) pa_volume_u8_c,
+ (pa_do_volume_func) pa_volume_alaw_c,
+ (pa_do_volume_func) pa_volume_ulaw_c,
+#ifdef WORDS_BIGENDIAN
+ (pa_do_volume_func) pa_volume_s16re_c,
+ (pa_do_volume_func) pa_volume_s16ne_c,
+ (pa_do_volume_func) pa_volume_float32re_c,
+ (pa_do_volume_func) pa_volume_float32ne_c,
+ (pa_do_volume_func) pa_volume_s32re_c,
+ (pa_do_volume_func) pa_volume_s32ne_c,
+ (pa_do_volume_func) pa_volume_s24re_c,
+ (pa_do_volume_func) pa_volume_s24ne_c,
+ (pa_do_volume_func) pa_volume_s24_32re_c
+ (pa_do_volume_func) pa_volume_s24_32ne_c,
+#else
+ (pa_do_volume_func) pa_volume_s16ne_c,
+ (pa_do_volume_func) pa_volume_s16re_c,
+ (pa_do_volume_func) pa_volume_float32ne_c,
+ (pa_do_volume_func) pa_volume_float32re_c,
+ (pa_do_volume_func) pa_volume_s32ne_c,
+ (pa_do_volume_func) pa_volume_s32re_c,
+ (pa_do_volume_func) pa_volume_s24ne_c,
+ (pa_do_volume_func) pa_volume_s24re_c,
+ (pa_do_volume_func) pa_volume_s24_32ne_c,
+ (pa_do_volume_func) pa_volume_s24_32re_c
+#endif
+};
- case PA_SAMPLE_FLOAT32RE: {
- float *d, *e;
- unsigned channel;
- float linear[PA_CHANNELS_MAX];
+void pa_volume_memchunk(
+ pa_memchunk*c,
+ const pa_sample_spec *spec,
+ const pa_cvolume *volume) {
- calc_linear_float_volume(linear, volume);
+ void *ptr;
+ volume_val linear[PA_CHANNELS_MAX];
- e = (float*) ptr + c->length/sizeof(float);
+ pa_assert(c);
+ pa_assert(spec);
+ pa_assert(c->length % pa_frame_size(spec) == 0);
+ pa_assert(volume);
- for (channel = 0, d = ptr; d < e; d++) {
- float t;
+ if (pa_memblock_is_silence(c->memblock))
+ return;
- t = PA_FLOAT32_SWAP(*d);
- t *= linear[channel];
- *d = PA_FLOAT32_SWAP(t);
+ if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM))
+ return;
- if (PA_UNLIKELY(++channel >= spec->channels))
- channel = 0;
- }
+ if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) {
+ pa_silence_memchunk(c, spec);
+ return;
+ }
- break;
- }
+ if (spec->format < 0 || spec->format > PA_SAMPLE_MAX) {
+ pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format));
+ return;
+ }
+ ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
- default:
- pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format));
- /* If we cannot change the volume, we just don't do it */
- }
+ calc_volume_funcs[spec->format] ((void *)linear, volume);
+ do_volume_funcs[spec->format] (ptr, (void *)linear, spec->channels, c->length);
pa_memblock_release(c->memblock);
}
commit 5b8b6544e205237d41bc502a7fd9f79051af78ec
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Tue Aug 11 16:25:44 2009 +0200
sample-utils: coding style cleanup
Make the coding style match the rest of pulseaudio more.
Remove some liboil functions, they seem unoptimized and likely slower than our
handrolled versions here.
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index ef43567..0d4e01e 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -30,9 +30,6 @@
#include <stdio.h>
#include <errno.h>
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
#include <pulse/timeval.h>
#include <pulsecore/log.h>
@@ -977,59 +974,50 @@ pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, un
}
}
-typedef void (*pa_do_volume_func) (void *samples, void *volumes, unsigned channels, unsigned length);
-typedef void (*pa_calc_volume_func) (void *volumes, const pa_cvolume *volume);
+typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length);
+typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
typedef union {
float f;
uint32_t i;
} volume_val;
-static pa_calc_volume_func calc_volume_funcs[] =
-{
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_float_volume,
- (pa_calc_volume_func) calc_linear_float_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume,
- (pa_calc_volume_func) calc_linear_integer_volume
+typedef struct pa_sample_func_t {
+ pa_calc_volume_func_t calc_volume;
+ pa_do_volume_func_t do_volume;
+} pa_sample_func_t;
+
+static const pa_calc_volume_func_t calc_volume_table[] = {
+ [PA_SAMPLE_U8] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_ALAW] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_ULAW] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S16LE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S16BE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+ [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+ [PA_SAMPLE_S32LE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S24LE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S24BE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S24_32LE] = (pa_calc_volume_func_t) calc_linear_integer_volume,
+ [PA_SAMPLE_S24_32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume
};
-static pa_do_volume_func do_volume_funcs[] =
+static pa_do_volume_func_t do_volume_table[] =
{
- (pa_do_volume_func) pa_volume_u8_c,
- (pa_do_volume_func) pa_volume_alaw_c,
- (pa_do_volume_func) pa_volume_ulaw_c,
-#ifdef WORDS_BIGENDIAN
- (pa_do_volume_func) pa_volume_s16re_c,
- (pa_do_volume_func) pa_volume_s16ne_c,
- (pa_do_volume_func) pa_volume_float32re_c,
- (pa_do_volume_func) pa_volume_float32ne_c,
- (pa_do_volume_func) pa_volume_s32re_c,
- (pa_do_volume_func) pa_volume_s32ne_c,
- (pa_do_volume_func) pa_volume_s24re_c,
- (pa_do_volume_func) pa_volume_s24ne_c,
- (pa_do_volume_func) pa_volume_s24_32re_c
- (pa_do_volume_func) pa_volume_s24_32ne_c,
-#else
- (pa_do_volume_func) pa_volume_s16ne_c,
- (pa_do_volume_func) pa_volume_s16re_c,
- (pa_do_volume_func) pa_volume_float32ne_c,
- (pa_do_volume_func) pa_volume_float32re_c,
- (pa_do_volume_func) pa_volume_s32ne_c,
- (pa_do_volume_func) pa_volume_s32re_c,
- (pa_do_volume_func) pa_volume_s24ne_c,
- (pa_do_volume_func) pa_volume_s24re_c,
- (pa_do_volume_func) pa_volume_s24_32ne_c,
- (pa_do_volume_func) pa_volume_s24_32re_c
-#endif
+ [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
+ [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
+ [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
+ [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
+ [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
+ [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
+ [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
+ [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
+ [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
+ [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
+ [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
+ [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
+ [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
};
void pa_volume_memchunk(
@@ -1063,8 +1051,8 @@ void pa_volume_memchunk(
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
- calc_volume_funcs[spec->format] ((void *)linear, volume);
- do_volume_funcs[spec->format] (ptr, (void *)linear, spec->channels, c->length);
+ calc_volume_table[spec->format] ((void *)linear, volume);
+ do_volume_table[spec->format] (ptr, (void *)linear, spec->channels, c->length);
pa_memblock_release(c->memblock);
}
@@ -1110,7 +1098,7 @@ void pa_interleave(const void *src[], unsigned channels, void *dst, size_t ss, u
d = (uint8_t*) dst + c * ss;
for (j = 0; j < n; j ++) {
- oil_memcpy(d, s, (int) ss);
+ memcpy(d, s, (int) ss);
s = (uint8_t*) s + ss;
d = (uint8_t*) d + fs;
}
@@ -1138,7 +1126,7 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss,
d = dst[c];
for (j = 0; j < n; j ++) {
- oil_memcpy(d, s, (int) ss);
+ memcpy(d, s, (int) ss);
s = (uint8_t*) s + fs;
d = (uint8_t*) d + ss;
}
@@ -1247,10 +1235,15 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
s = src; d = dst;
if (format == PA_SAMPLE_FLOAT32NE) {
+ for (; n > 0; n--) {
+ float f;
- float minus_one = -1.0, plus_one = 1.0;
- oil_clip_f32(d, (int) dstr, s, (int) sstr, (int) n, &minus_one, &plus_one);
+ f = *s;
+ *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
+ s = (const float*) ((const uint8_t*) s + sstr);
+ d = (float*) ((uint8_t*) d + dstr);
+ }
} else {
pa_assert(format == PA_SAMPLE_FLOAT32RE);
commit e71e644eb668b6336dd48d2730839aa3e9f7278e
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Tue Aug 11 16:43:46 2009 +0200
sample-util: move some functions around
Move some stuff around before splitting it into a separate file.
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 0d4e01e..f8a4c70 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -975,17 +975,34 @@ pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, un
}
typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length);
-typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
+
+typedef struct pa_sample_func_t {
+ pa_do_volume_func_t do_volume;
+} pa_sample_func_t;
+
+static pa_do_volume_func_t do_volume_table[] =
+{
+ [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
+ [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
+ [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
+ [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
+ [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
+ [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
+ [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
+ [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
+ [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
+ [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
+ [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
+ [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
+ [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
+};
typedef union {
float f;
uint32_t i;
} volume_val;
-typedef struct pa_sample_func_t {
- pa_calc_volume_func_t calc_volume;
- pa_do_volume_func_t do_volume;
-} pa_sample_func_t;
+typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
static const pa_calc_volume_func_t calc_volume_table[] = {
[PA_SAMPLE_U8] = (pa_calc_volume_func_t) calc_linear_integer_volume,
@@ -1003,23 +1020,6 @@ static const pa_calc_volume_func_t calc_volume_table[] = {
[PA_SAMPLE_S24_32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume
};
-static pa_do_volume_func_t do_volume_table[] =
-{
- [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
- [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
- [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
- [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
- [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
- [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
- [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
- [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
- [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
- [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
- [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
- [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
- [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
-};
-
void pa_volume_memchunk(
pa_memchunk*c,
const pa_sample_spec *spec,
commit 3d008961c095cf8d41d2c61d13d446c98c892136
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Tue Aug 11 17:10:44 2009 +0200
sample-util: move volume code to separate file
Move the volume code into a separate file with the reference C implementations.
Add a function to retrieve the volume function and one to install a new one.
diff --git a/src/Makefile.am b/src/Makefile.am
index 17011cd..fc5d39f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -825,6 +825,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/resampler.c pulsecore/resampler.h \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
+ pulsecore/svolume_c.c \
pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \
pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \
pulsecore/sconv.c pulsecore/sconv.h \
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index f8a4c70..0bbd519 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -687,316 +687,6 @@ size_t pa_mix(
return length;
}
-typedef struct pa_volume_funcs {
- void (*u8) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*alaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*ulaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s16ne) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s16re) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*float32ne) (float *samples, float *volumes, unsigned channels, unsigned length);
- void (*float32re) (float *samples, float *volumes, unsigned channels, unsigned length);
- void (*s32ne) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s32re) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s24ne) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s24re) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s24_32ne) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
- void (*s24_32re) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length);
-} pa_volume_funcs;
-
-static void
-pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) *samples - 0x80;
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
- *samples++ = (uint8_t) (t + 0x80);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_alaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_ulaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int16_t);
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- /* Multiplying the 32bit volume factor with the 16bit
- * sample might result in an 48bit value. We want to
- * do without 64 bit integers and hence do the
- * multiplication independantly for the HI and LO part
- * of the volume. */
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t)(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (int16_t) t;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int16_t);
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) PA_INT16_SWAP(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = PA_INT16_SWAP((int16_t) t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- *samples++ *= volumes[channel];
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- float t;
-
- t = PA_FLOAT32_SWAP(*samples);
- t *= volumes[channel];
- *samples++ = PA_FLOAT32_SWAP(t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t)(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = (int32_t) t;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) PA_INT32_SWAP(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_INT32_SWAP((int32_t) t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (*samples << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length);
-
-typedef struct pa_sample_func_t {
- pa_do_volume_func_t do_volume;
-} pa_sample_func_t;
-
-static pa_do_volume_func_t do_volume_table[] =
-{
- [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
- [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
- [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
- [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
- [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
- [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
- [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
- [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
- [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
- [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
- [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
- [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
- [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
-};
-
typedef union {
float f;
uint32_t i;
@@ -1027,6 +717,7 @@ void pa_volume_memchunk(
void *ptr;
volume_val linear[PA_CHANNELS_MAX];
+ pa_do_volume_func_t do_volume;
pa_assert(c);
pa_assert(spec);
@@ -1051,8 +742,11 @@ void pa_volume_memchunk(
ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
+ do_volume = pa_get_volume_func (spec->format);
+ pa_assert(do_volume);
+
calc_volume_table[spec->format] ((void *)linear, volume);
- do_volume_table[spec->format] (ptr, (void *)linear, spec->channels, c->length);
+ do_volume (ptr, (void *)linear, spec->channels, c->length);
pa_memblock_release(c->memblock);
}
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index 6a306c1..278b88b 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -86,6 +86,12 @@ void pa_memchunk_dump_to_file(pa_memchunk *c, const char *fn);
void pa_memchunk_sine(pa_memchunk *c, pa_mempool *pool, unsigned rate, unsigned freq);
+typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length);
+
+pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f);
+void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func);
+
+
#define PA_CHANNEL_POSITION_MASK_LEFT \
(PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \
| PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT) \
diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
new file mode 100644
index 0000000..2148a57
--- /dev/null
+++ b/src/pulsecore/svolume_c.c
@@ -0,0 +1,335 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2006 Pierre Ossman <ossman at cendio.se> for Cendio AB
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+static void
+pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) *samples - 0x80;
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+ *samples++ = (uint8_t) (t + 0x80);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_alaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_ulaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int16_t);
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ /* Multiplying the 32bit volume factor with the 16bit
+ * sample might result in an 48bit value. We want to
+ * do without 64 bit integers and hence do the
+ * multiplication independantly for the HI and LO part
+ * of the volume. */
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t)(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (int16_t) t;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int16_t);
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) PA_INT16_SWAP(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = PA_INT16_SWAP((int16_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ *samples++ *= volumes[channel];
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ float t;
+
+ t = PA_FLOAT32_SWAP(*samples);
+ t *= volumes[channel];
+ *samples++ = PA_FLOAT32_SWAP(t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t)(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = (int32_t) t;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) PA_INT32_SWAP(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_INT32_SWAP((int32_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (*samples << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static pa_do_volume_func_t do_volume_table[] =
+{
+ [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
+ [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
+ [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
+ [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
+ [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
+ [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
+ [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
+ [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
+ [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
+ [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
+ [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
+ [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
+ [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
+};
+
+pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ return do_volume_table[f];
+}
+
+void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func) {
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ do_volume_table[f] = func;
+}
commit 2d73f13567ad03efe798d07eda87fa776b0505f2
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 12 17:03:30 2009 +0200
samples-util: add padding to volume array
Pad the volume array with a copy of the start. We'll need this later to be able
to write optimized functions.
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 0bbd519..677f914 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -103,24 +103,36 @@ void* pa_silence_memory(void *p, size_t length, const pa_sample_spec *spec) {
return p;
}
+#define VOLUME_PADDING 32
+
static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) {
- unsigned channel;
+ unsigned channel, nchannels, padding;
pa_assert(linear);
pa_assert(volume);
- for (channel = 0; channel < volume->channels; channel++)
+ nchannels = volume->channels;
+
+ for (channel = 0; channel < nchannels; channel++)
linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000);
+
+ for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+ linear[channel] = linear[padding];
}
static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) {
- unsigned channel;
+ unsigned channel, nchannels, padding;
pa_assert(linear);
pa_assert(volume);
- for (channel = 0; channel < volume->channels; channel++)
+ nchannels = volume->channels;
+
+ for (channel = 0; channel < nchannels; channel++)
linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]);
+
+ for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+ linear[channel] = linear[padding];
}
static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
@@ -716,7 +728,7 @@ void pa_volume_memchunk(
const pa_cvolume *volume) {
void *ptr;
- volume_val linear[PA_CHANNELS_MAX];
+ volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING];
pa_do_volume_func_t do_volume;
pa_assert(c);
commit 3a0b012ee016e2fe40f49c72da119cb89d2ba312
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 12 17:08:41 2009 +0200
volume: add first mmx optimized function
Add code for an mmx optimized version of s16ne volume scaling. Install the
custom function.
diff --git a/src/Makefile.am b/src/Makefile.am
index fc5d39f..e7a9900 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -826,6 +826,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
pulsecore/svolume_c.c \
+ pulsecore/svolume_mmx.c \
pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \
pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \
pulsecore/sconv.c pulsecore/sconv.h \
diff --git a/src/daemon/main.c b/src/daemon/main.c
index 8521e72..e3c395f 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -821,6 +821,8 @@ int main(int argc, char *argv[]) {
pa_memtrap_install();
+ pa_volume_func_init_mmx();
+
pa_assert_se(mainloop = pa_mainloop_new());
if (!(c = pa_core_new(pa_mainloop_get_api(mainloop), !conf->disable_shm, conf->shm_size))) {
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index 278b88b..00b9ae0 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -91,6 +91,7 @@ typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned chan
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f);
void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func);
+void pa_volume_func_init_mmx(void);
#define PA_CHANNEL_POSITION_MASK_LEFT \
(PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
new file mode 100644
index 0000000..9f49a62
--- /dev/null
+++ b/src/pulsecore/svolume_mmx.c
@@ -0,0 +1,424 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <alloca.h>
+
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if 0
+static void
+pa_volume_u8_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) *samples - 0x80;
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+ *samples++ = (uint8_t) (t + 0x80);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_alaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_alaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_ulaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+#endif
+
+static void
+pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ int64_t channel, temp;
+
+ /* the max number of samples we process at a time */
+ channels = MAX (4, channels);
+
+#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
+ " pxor %%mm4, %%mm4 \n\t" \
+ " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
+ " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \
+ " pand "#v", %%mm4 \n\t" /* extract correction factors */ \
+ " movq "#s", %%mm5 \n\t" \
+ " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
+ " psubd %%mm4, "#s" \n\t" /* sign correction */ \
+ " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \
+ " pmaddwd %%mm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \
+ " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \
+ " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */
+
+#define MOD_ADD(a,b) \
+ " add "#a", %3 \n\t" \
+ " mov %3, %4 \n\t" \
+ " sub %5, %4 \n\t" \
+ " cmp %3, "#b" \n\t" \
+ " cmovae %4, %3 \n\t"
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */
+ " movw (%0), %%ax \n\t"
+ " movd %%eax, %%mm1 \n\t"
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, %%eax \n\t"
+ " movw %%ax, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+ " dec %2 \n\t"
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in pairs of 2 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, (%0) \n\t"
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+ " dec %2 \n\t"
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in pairs of 4 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movq 8(%1, %3, 4), %%mm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
+ " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
+ " movd 4(%0), %%mm3 \n\t" /* X | X | p3 | p2 */
+ VOLUME_32x16 (%%mm1, %%mm0)
+ VOLUME_32x16 (%%mm3, %%mm2)
+ " movd %%mm0, (%0) \n\t"
+ " movd %%mm2, 4(%0) \n\t"
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+ " dec %2 \n\t"
+ " jne 5b \n\t"
+
+ "6: \n\t"
+ " emms \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=r" (temp)
+ : "r" ((int64_t)channels)
+ : "rax", "cc"
+ );
+}
+
+#if 0
+static void
+pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int16_t);
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) PA_INT16_SWAP(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = PA_INT16_SWAP((int16_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ *samples++ *= volumes[channel];
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32re_mmx (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ float t;
+
+ t = PA_FLOAT32_SWAP(*samples);
+ t *= volumes[channel];
+ *samples++ = PA_FLOAT32_SWAP(t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32ne_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t)(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = (int32_t) t;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32re_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) PA_INT32_SWAP(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_INT32_SWAP((int32_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24ne_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24re_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32ne_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (*samples << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+#endif
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 1
+#define SAMPLES 1021
+#define TIMES 1000
+
+static void run_test (void) {
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS];
+ int i, j;
+ pa_do_volume_func_t func;
+
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking\n");
+
+ for (j = 0; j < TIMES; j++) {
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++) {
+ volumes[i] = rand() >> 15;
+ }
+
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+ func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+#if 0
+ else
+ printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+#endif
+ }
+ }
+}
+#endif
+
+void pa_volume_func_init_mmx (void) {
+ pa_log_info("Initialising MMX optimized functions.");
+
+#ifdef RUN_TEST
+ run_test ();
+#endif
+
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
+}
commit 08f3e16c84fabca9c6789440f98ff8dca62eb81a
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 12 20:43:37 2009 +0200
volume_mmx: fix mmx code a bit
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 9f49a62..6dcc26c 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -104,14 +104,15 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
{
int64_t channel, temp;
- /* the max number of samples we process at a time */
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
channels = MAX (4, channels);
#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
" pxor %%mm4, %%mm4 \n\t" \
" punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
" pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \
- " pand "#v", %%mm4 \n\t" /* extract correction factors */ \
+ " pand "#v", %%mm4 \n\t" /* extract sign correction factors */ \
" movq "#s", %%mm5 \n\t" \
" pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
" psubd %%mm4, "#s" \n\t" /* sign correction */ \
@@ -123,8 +124,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define MOD_ADD(a,b) \
" add "#a", %3 \n\t" \
" mov %3, %4 \n\t" \
- " sub %5, %4 \n\t" \
- " cmp %3, "#b" \n\t" \
+ " sub "#b", %4 \n\t" \
+ " cmp "#b", %3 \n\t" \
" cmovae %4, %3 \n\t"
__asm__ __volatile__ (
@@ -135,14 +136,13 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 2f \n\t"
" movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */
- " movw (%0), %%ax \n\t"
- " movd %%eax, %%mm1 \n\t"
+ " movw (%0), %4 \n\t"
+ " movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, %%eax \n\t"
- " movw %%ax, (%0) \n\t"
+ " movd %%mm0, %4 \n\t"
+ " movw %4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
- " dec %2 \n\t"
"2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
@@ -156,7 +156,6 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movd %%mm0, (%0) \n\t"
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
- " dec %2 \n\t"
"4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
@@ -180,9 +179,9 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"6: \n\t"
" emms \n\t"
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=r" (temp)
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp)
: "r" ((int64_t)channels)
- : "rax", "cc"
+ : "cc"
);
}
@@ -370,7 +369,7 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels,
#undef RUN_TEST
#ifdef RUN_TEST
-#define CHANNELS 1
+#define CHANNELS 2
#define SAMPLES 1021
#define TIMES 1000
@@ -378,25 +377,32 @@ static void run_test (void) {
int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS];
- int i, j;
+ int32_t volumes[CHANNELS + 16];
+ int i, j, padding;
pa_do_volume_func_t func;
func = pa_get_volume_func (PA_SAMPLE_S16NE);
- printf ("checking\n");
+ printf ("checking %d\n", sizeof (samples));
for (j = 0; j < TIMES; j++) {
+ /*
+ for (i = 0; i < SAMPLES; i++) {
+ samples[i] samples_ref[i] = samples_orig[i] = rand() >> 16;
+ }
+ */
+
pa_random (samples, sizeof (samples));
memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples));
- for (i = 0; i < CHANNELS; i++) {
+ for (i = 0; i < CHANNELS; i++)
volumes[i] = rand() >> 15;
- }
+ for (padding = 0; padding < 16; padding++, i++)
+ volumes[i] = volumes[padding];
- pa_volume_s16ne_mmx (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
- func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
if (samples[i] != samples_ref[i]) {
commit 7086784573e9e6c92d4c34404f18891c2d19872a
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 12 20:44:12 2009 +0200
volume_sse: add sse optimisations
diff --git a/src/Makefile.am b/src/Makefile.am
index e7a9900..b692e4a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -826,7 +826,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
pulsecore/svolume_c.c \
- pulsecore/svolume_mmx.c \
+ pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \
pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \
pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \
pulsecore/sconv.c pulsecore/sconv.h \
diff --git a/src/daemon/main.c b/src/daemon/main.c
index e3c395f..3c5f7f9 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -822,6 +822,7 @@ int main(int argc, char *argv[]) {
pa_memtrap_install();
pa_volume_func_init_mmx();
+ pa_volume_func_init_sse();
pa_assert_se(mainloop = pa_mainloop_new());
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index 00b9ae0..563dbb6 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -92,6 +92,7 @@ pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f);
void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func);
void pa_volume_func_init_mmx(void);
+void pa_volume_func_init_sse(void);
#define PA_CHANNEL_POSITION_MASK_LEFT \
(PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
new file mode 100644
index 0000000..2d4c541
--- /dev/null
+++ b/src/pulsecore/svolume_sse.c
@@ -0,0 +1,437 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <alloca.h>
+
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if 0
+static void
+pa_volume_u8_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) *samples - 0x80;
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+ *samples++ = (uint8_t) (t + 0x80);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_alaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_alaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) st_ulaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+#endif
+
+static void
+pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ int64_t channel;
+ int64_t temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (8, channels);
+
+#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
+ " pxor %%xmm4, %%xmm4 \n\t" \
+ " punpcklwd %%xmm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
+ " pcmpgtw "#s", %%xmm4 \n\t" /* select sign from sample */ \
+ " pand "#v", %%xmm4 \n\t" /* extract sign correction factors */ \
+ " movdqa "#s", %%xmm5 \n\t" \
+ " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
+ " psubd %%xmm4, "#s" \n\t" /* sign correction */ \
+ " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \
+ " pmaddwd %%xmm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \
+ " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \
+ " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */
+
+#define MOD_ADD(a,b) \
+ " add "#a", %3 \n\t" \
+ " mov %3, %4 \n\t" \
+ " sub "#b", %4 \n\t" \
+ " cmp "#b", %3 \n\t" \
+ " cmovae %4, %3 \n\t"
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */
+ " movw (%0), %4 \n\t"
+ " movd %4, %%xmm1 \n\t"
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, %4 \n\t"
+ " movw %4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in pairs of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, (%0) \n\t"
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in pairs of 4 */
+ " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movq %%xmm0, (%0) \n\t"
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+
+ "6: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 8f \n\t"
+
+ "7: \n\t" /* do samples in pairs of 8 */
+ " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movdqa 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
+ " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ VOLUME_32x16 (%%xmm3, %%xmm2)
+ " movq %%xmm0, (%0) \n\t"
+ " movq %%xmm2, 8(%0) \n\t"
+ " add $16, %0 \n\t"
+ MOD_ADD ($8, %5)
+ " dec %2 \n\t"
+ " jne 7b \n\t"
+ "8: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+ : "r" ((int64_t)channels)
+ : "cc"
+ );
+}
+
+#if 0
+static void
+pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int16_t);
+
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
+
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
+
+ t = (int32_t) PA_INT16_SWAP(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = PA_INT16_SWAP((int16_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ *samples++ *= volumes[channel];
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_float32re_sse (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (float);
+
+ for (channel = 0; length; length--) {
+ float t;
+
+ t = PA_FLOAT32_SWAP(*samples);
+ t *= volumes[channel];
+ *samples++ = PA_FLOAT32_SWAP(t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32ne_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t)(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = (int32_t) t;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s32re_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (int32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) PA_INT32_SWAP(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_INT32_SWAP((int32_t) t);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24ne_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24re_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+ uint8_t *e;
+
+ e = samples + length;
+
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
+
+ t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32ne_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (*samples << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+
+static void
+pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ unsigned channel;
+
+ length /= sizeof (uint32_t);
+
+ for (channel = 0; length; length--) {
+ int64_t t;
+
+ t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
+}
+#endif
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 2
+#define SAMPLES 1021
+#define TIMES 1000
+
+static void run_test (void) {
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS + 16];
+ int i, j, padding;
+ pa_do_volume_func_t func;
+
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking %d\n", sizeof (samples));
+
+ for (j = 0; j < TIMES; j++) {
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 15;
+
+ for (padding = 0; padding < 16; padding++, i++)
+ volumes[i] = volumes[padding];
+
+ pa_volume_s16ne_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+ func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+#if 0
+ else
+ printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+#endif
+ }
+ }
+}
+#endif
+
+void pa_volume_func_init_sse (void) {
+ pa_log_info("Initialising SSE optimized functions.");
+
+#ifdef RUN_TEST
+ run_test ();
+#endif
+
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
+}
commit 5998cf99b08d448dd5158ed6229262aa67ea4a66
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 13 13:45:01 2009 +0200
svolume: improve SSE and MMX code
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 6dcc26c..3c22945 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -99,6 +99,46 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
}
#endif
+#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \
+ " pxor %%mm4, %%mm4 \n\t" /* .. | 0 | 0 | */ \
+ " punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \
+ " pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \
+ " pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \
+ " movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \
+ " pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \
+ " por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \
+ " pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \
+ " paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \
+ " pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \
+ " por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \
+ " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
+ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
+
+#define MOD_ADD(a,b) \
+ " add "#a", %3 \n\t" \
+ " mov %3, %4 \n\t" \
+ " sub "#b", %4 \n\t" \
+ " cmp "#b", %3 \n\t" \
+ " cmovae %4, %3 \n\t"
+
+/* swap 16 bits */
+#define SWAP_16(s) \
+ " movq "#s", %%mm4 \n\t" /* .. | h l | */ \
+ " psrlw $8, %%mm4 \n\t" /* .. | 0 h | */ \
+ " psllw $8, "#s" \n\t" /* .. | l 0 | */ \
+ " por %%mm4, "#s" \n\t" /* .. | l h | */
+
+/* swap 2 registers 16 bits for better pairing */
+#define SWAP_16_2(s1,s2) \
+ " movq "#s1", %%mm4 \n\t" /* .. | h l | */ \
+ " movq "#s2", %%mm5 \n\t" \
+ " psrlw $8, %%mm4 \n\t" /* .. | 0 h | */ \
+ " psrlw $8, %%mm5 \n\t" \
+ " psllw $8, "#s1" \n\t" /* .. | l 0 | */ \
+ " psllw $8, "#s2" \n\t" \
+ " por %%mm4, "#s1" \n\t" /* .. | l h | */ \
+ " por %%mm5, "#s2" \n\t"
+
static void
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
@@ -108,38 +148,22 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
* we overread the volume array, which should have enough padding. */
channels = MAX (4, channels);
-#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
- " pxor %%mm4, %%mm4 \n\t" \
- " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
- " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \
- " pand "#v", %%mm4 \n\t" /* extract sign correction factors */ \
- " movq "#s", %%mm5 \n\t" \
- " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
- " psubd %%mm4, "#s" \n\t" /* sign correction */ \
- " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \
- " pmaddwd %%mm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \
- " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \
- " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */
-
-#define MOD_ADD(a,b) \
- " add "#a", %3 \n\t" \
- " mov %3, %4 \n\t" \
- " sub "#b", %4 \n\t" \
- " cmp "#b", %3 \n\t" \
- " cmovae %4, %3 \n\t"
-
__asm__ __volatile__ (
" xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
+ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
+ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
+ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
" test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t"
- " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */
- " movw (%0), %4 \n\t"
+ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %4 \n\t" /* .. | p0 | */
" movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, %4 \n\t"
+ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
" movw %4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
@@ -149,11 +173,11 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" test $1, %2 \n\t" /* check for odd samples */
" je 4f \n\t"
- "3: \n\t" /* do samples in pairs of 2 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, (%0) \n\t"
+ " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@@ -162,15 +186,15 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" cmp $0, %2 \n\t"
" je 6f \n\t"
- "5: \n\t" /* do samples in pairs of 4 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movq 8(%1, %3, 4), %%mm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
- " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */
- " movd 4(%0), %%mm3 \n\t" /* X | X | p3 | p2 */
+ "5: \n\t" /* do samples in groups of 4 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2)
- " movd %%mm0, (%0) \n\t"
- " movd %%mm2, 4(%0) \n\t"
+ " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
" dec %2 \n\t"
@@ -185,30 +209,83 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
);
}
-#if 0
static void
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ int64_t channel, temp;
- length /= sizeof (int16_t);
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (4, channels);
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
+ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
+ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
+ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
- t = (int32_t) PA_INT16_SWAP(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = PA_INT16_SWAP((int16_t) t);
+ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %4 \n\t" /* .. | p0 | */
+ " rorw $8, %4 \n\t"
+ " movd %4, %%mm1 \n\t"
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
+ " rorw $8, %4 \n\t"
+ " movw %4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ SWAP_16 (%%mm1)
+ VOLUME_32x16 (%%mm1, %%mm0)
+ SWAP_16 (%%mm0)
+ " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in groups of 4 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
+ SWAP_16_2 (%%mm1, %%mm3)
+ VOLUME_32x16 (%%mm1, %%mm0)
+ VOLUME_32x16 (%%mm3, %%mm2)
+ SWAP_16_2 (%%mm0, %%mm2)
+ " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+ " dec %2 \n\t"
+ " jne 5b \n\t"
+
+ "6: \n\t"
+ " emms \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp)
+ : "r" ((int64_t)channels)
+ : "cc"
+ );
}
+#if 0
static void
pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length)
{
@@ -366,42 +443,37 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#undef RUN_TEST
+#define RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
#define SAMPLES 1021
#define TIMES 1000
+#define PADDING 16
static void run_test (void) {
int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS + 16];
+ int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
- func = pa_get_volume_func (PA_SAMPLE_S16NE);
+ func = pa_get_volume_func (PA_SAMPLE_S16RE);
- printf ("checking %d\n", sizeof (samples));
+ printf ("checking MMX %d\n", sizeof (samples));
for (j = 0; j < TIMES; j++) {
- /*
- for (i = 0; i < SAMPLES; i++) {
- samples[i] samples_ref[i] = samples_orig[i] = rand() >> 16;
- }
- */
-
pa_random (samples, sizeof (samples));
memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 15;
- for (padding = 0; padding < 16; padding++, i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding];
- pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ pa_volume_s16re_mmx (samples, volumes, CHANNELS, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
@@ -409,11 +481,6 @@ static void run_test (void) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
-#if 0
- else
- printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
-#endif
}
}
}
@@ -427,4 +494,5 @@ void pa_volume_func_init_mmx (void) {
#endif
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
+ pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
}
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 2d4c541..ff583a0 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -99,6 +99,44 @@ pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
}
#endif
+#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \
+ " pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \
+ " punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \
+ " pcmpgtw "#s", %%xmm4 \n\t" /* .. | 0 | s(p0) | */ \
+ " pand "#v", %%xmm4 \n\t" /* .. | 0 | (vl) | */ \
+ " movdqa "#s", %%xmm5 \n\t" \
+ " pmulhuw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \
+ " psubd %%xmm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \
+ " psrld $16, "#v" \n\t" /* .. | p0 | 0 | */ \
+ " pmaddwd %%xmm5, "#v" \n\t" /* .. | p0 * vh | */ \
+ " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
+ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
+
+#define MOD_ADD(a,b) \
+ " add "#a", %3 \n\t" /* channel += inc */ \
+ " mov %3, %4 \n\t" \
+ " sub "#b", %4 \n\t" /* tmp = channel - channels */ \
+ " cmp "#b", %3 \n\t" /* if (channel >= channels) */ \
+ " cmovae %4, %3 \n\t" /* channel = tmp */
+
+/* swap 16 bits */
+#define SWAP_16(s) \
+ " movdqa "#s", %%xmm4 \n\t" /* .. | h l | */ \
+ " psrlw $8, %%xmm4 \n\t" /* .. | 0 h | */ \
+ " psllw $8, "#s" \n\t" /* .. | l 0 | */ \
+ " por %%xmm4, "#s" \n\t" /* .. | l h | */
+
+/* swap 2 registers 16 bits for better pairing */
+#define SWAP_16_2(s1,s2) \
+ " movdqa "#s1", %%xmm4 \n\t" /* .. | h l | */ \
+ " movdqa "#s2", %%xmm5 \n\t" \
+ " psrlw $8, %%xmm4 \n\t" /* .. | 0 h | */ \
+ " psrlw $8, %%xmm5 \n\t" \
+ " psllw $8, "#s1" \n\t" /* .. | l 0 | */ \
+ " psllw $8, "#s2" \n\t" \
+ " por %%xmm4, "#s1" \n\t" /* .. | l h | */ \
+ " por %%xmm5, "#s2" \n\t"
+
static void
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
@@ -109,25 +147,83 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
* we overread the volume array, which should have enough padding. */
channels = MAX (8, channels);
-#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \
- " pxor %%xmm4, %%xmm4 \n\t" \
- " punpcklwd %%xmm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \
- " pcmpgtw "#s", %%xmm4 \n\t" /* select sign from sample */ \
- " pand "#v", %%xmm4 \n\t" /* extract sign correction factors */ \
- " movdqa "#s", %%xmm5 \n\t" \
- " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \
- " psubd %%xmm4, "#s" \n\t" /* sign correction */ \
- " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \
- " pmaddwd %%xmm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \
- " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \
- " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
-#define MOD_ADD(a,b) \
- " add "#a", %3 \n\t" \
- " mov %3, %4 \n\t" \
- " sub "#b", %4 \n\t" \
- " cmp "#b", %3 \n\t" \
- " cmovae %4, %3 \n\t"
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %4 \n\t" /* .. | p0 | */
+ " movd %4, %%xmm1 \n\t"
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
+ " movw %4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t"
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " test $1, %2 \n\t"
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in groups of 4 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+
+ "6: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 8f \n\t"
+
+ "7: \n\t" /* do samples in groups of 8 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ VOLUME_32x16 (%%xmm3, %%xmm2)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
+ " add $16, %0 \n\t"
+ MOD_ADD ($8, %5)
+ " dec %2 \n\t"
+ " jne 7b \n\t"
+ "8: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+ : "r" ((int64_t)channels)
+ : "cc"
+ );
+}
+
+static void
+pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ int64_t channel;
+ int64_t temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (8, channels);
__asm__ __volatile__ (
" xor %3, %3 \n\t"
@@ -138,9 +234,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */
" movw (%0), %4 \n\t"
+ " rorw $8, %4 \n\t"
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t"
+ " rorw $8, %4 \n\t"
" movw %4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
@@ -153,7 +251,9 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"3: \n\t" /* do samples in pairs of 2 */
" movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
" movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
+ SWAP_16 (%%xmm0)
" movd %%xmm0, (%0) \n\t"
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@@ -164,9 +264,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 6f \n\t"
"5: \n\t" /* do samples in pairs of 4 */
- " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
" movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
+ SWAP_16 (%%xmm0)
" movq %%xmm0, (%0) \n\t"
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
@@ -177,12 +279,14 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 8f \n\t"
"7: \n\t" /* do samples in pairs of 8 */
- " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movdqa 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
" movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
" movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */
+ SWAP_16_2 (%%xmm1, %%xmm3)
VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2)
+ SWAP_16_2 (%%xmm0, %%xmm2)
" movq %%xmm0, (%0) \n\t"
" movq %%xmm2, 8(%0) \n\t"
" add $16, %0 \n\t"
@@ -199,29 +303,6 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#if 0
static void
-pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int16_t);
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) PA_INT16_SWAP(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = PA_INT16_SWAP((int16_t) t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length)
{
unsigned channel;
@@ -378,24 +459,25 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#undef RUN_TEST
+#define RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
#define SAMPLES 1021
#define TIMES 1000
+#define PADDING 16
static void run_test (void) {
int16_t samples[SAMPLES];
int16_t samples_ref[SAMPLES];
int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS + 16];
+ int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
- func = pa_get_volume_func (PA_SAMPLE_S16NE);
+ func = pa_get_volume_func (PA_SAMPLE_S16RE);
- printf ("checking %d\n", sizeof (samples));
+ printf ("checking SSE %d\n", sizeof (samples));
for (j = 0; j < TIMES; j++) {
pa_random (samples, sizeof (samples));
@@ -403,12 +485,11 @@ static void run_test (void) {
memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 15;
-
- for (padding = 0; padding < 16; padding++, i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding];
- pa_volume_s16ne_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
+ pa_volume_s16re_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
for (i = 0; i < SAMPLES; i++) {
@@ -416,11 +497,6 @@ static void run_test (void) {
printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
-#if 0
- else
- printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
-#endif
}
}
}
@@ -434,4 +510,5 @@ void pa_volume_func_init_sse (void) {
#endif
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
+ pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
}
commit a83f5524fbf2f0fa861d2fae6973f0f42e8c9c25
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 13 17:11:43 2009 +0200
cpu-x86: add cpu detection code and helpers
Add CPU detection code and various macros and typdefs to make it easier to write
64 and 32 bit code.
diff --git a/src/Makefile.am b/src/Makefile.am
index b692e4a..4e90d79 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -825,6 +825,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/resampler.c pulsecore/resampler.h \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
+ pulsecore/cpu-x86.c \
pulsecore/svolume_c.c \
pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \
pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \
diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
new file mode 100644
index 0000000..2da31c9
--- /dev/null
+++ b/src/pulsecore/cpu-x86.c
@@ -0,0 +1,122 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdint.h>
+
+#include <pulsecore/log.h>
+
+#include "cpu-x86.h"
+
+#if defined (__i386__) || defined (__amd64__)
+static void
+get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
+ __asm__ __volatile__ (
+ " push %%"PA_REG_b" \n\t"
+ " cpuid \n\t"
+ " mov %%ebx, %%esi \n\t"
+ " pop %%"PA_REG_b" \n\t"
+
+ : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
+ : "0" (op));
+}
+#endif
+
+static pa_cpu_x86_flag_t pa_cpu_x86_flags;
+
+void pa_cpu_init_x86 (void) {
+#if defined (__i386__) || defined (__amd64__)
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t level;
+
+ /* get standard level */
+ get_cpuid (0x00000000, &level, &ebx, &ecx, &edx);
+ if (level >= 1) {
+ get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
+
+ if (edx & (1<<23))
+ pa_cpu_x86_flags |= PA_CPU_X86_MMX;
+
+ if (edx & (1<<25))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSE;
+
+ if (edx & (1<<26))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSE2;
+
+ if (ecx & (1<<0))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSE3;
+
+ if (ecx & (1<<9))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSSE3;
+
+ if (ecx & (1<<19))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSE4_1;
+
+ if (ecx & (1<<20))
+ pa_cpu_x86_flags |= PA_CPU_X86_SSE4_2;
+ }
+
+ /* get extended level */
+ get_cpuid (0x80000000, &level, &ebx, &ecx, &edx);
+ if (level >= 0x80000001) {
+ get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
+
+ if (edx & (1<<22))
+ pa_cpu_x86_flags |= PA_CPU_X86_MMXEXT;
+
+ if (edx & (1<<23))
+ pa_cpu_x86_flags |= PA_CPU_X86_MMX;
+
+ if (edx & (1<<30))
+ pa_cpu_x86_flags |= PA_CPU_X86_3DNOWEXT;
+
+ if (edx & (1<<31))
+ pa_cpu_x86_flags |= PA_CPU_X86_3DNOW;
+ }
+
+ pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
+ (pa_cpu_x86_flags & PA_CPU_X86_MMX) ? "MMX " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSE) ? "SSE " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
+ (pa_cpu_x86_flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
+
+ /* activate various optimisations */
+ if (pa_cpu_x86_flags & PA_CPU_X86_MMX) {
+ pa_volume_func_init_mmx (pa_cpu_x86_flags);
+ }
+ if (pa_cpu_x86_flags & PA_CPU_X86_SSE) {
+ pa_volume_func_init_sse (pa_cpu_x86_flags);
+ }
+#else
+ pa_cpu_x86_flags = 0;
+#endif
+}
diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h
new file mode 100644
index 0000000..8158ea7
--- /dev/null
+++ b/src/pulsecore/cpu-x86.h
@@ -0,0 +1,61 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <stdint.h>
+
+typedef enum pa_cpu_x86_flag {
+ PA_CPU_X86_MMX = (1 << 0),
+ PA_CPU_X86_MMXEXT = (1 << 1),
+ PA_CPU_X86_SSE = (1 << 2),
+ PA_CPU_X86_SSE2 = (1 << 3),
+ PA_CPU_X86_SSE3 = (1 << 4),
+ PA_CPU_X86_SSSE3 = (1 << 5),
+ PA_CPU_X86_SSE4_1 = (1 << 6),
+ PA_CPU_X86_SSE4_2 = (1 << 7),
+ PA_CPU_X86_3DNOW = (1 << 8),
+ PA_CPU_X86_3DNOWEXT = (1 << 9)
+} pa_cpu_x86_flag_t;
+
+void pa_cpu_init_x86 (void);
+
+
+#if defined (__i386__)
+typedef int32_t pa_reg_x86;
+#define PA_REG_a "eax"
+#define PA_REG_b "ebx"
+#define PA_REG_c "ecx"
+#define PA_REG_d "edx"
+#define PA_REG_D "edi"
+#define PA_REG_S "esi"
+#elif defined (__amd64__)
+typedef int64_t pa_reg_x86;
+#define PA_REG_a "rax"
+#define PA_REG_b "rbx"
+#define PA_REG_c "rcx"
+#define PA_REG_d "rdx"
+#define PA_REG_D "rdi"
+#define PA_REG_S "rsi"
+#endif
+
+/* some optimized functions */
+void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags);
+void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags);
commit 563cb2dea9f7f73180e2b8cc8d45b0df9358c936
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 13 17:12:44 2009 +0200
main: hook up cpu detection code
Add CPU detection code to activate the various optimisations.
Move some method definitions around.
Use compatibility macros when we can.
diff --git a/src/daemon/main.c b/src/daemon/main.c
index 3c5f7f9..774b4e9 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -95,6 +95,7 @@
#ifdef HAVE_DBUS
#include <pulsecore/dbus-shared.h>
#endif
+#include <pulsecore/cpu-x86.h>
#include "cmdline.h"
#include "cpulimit.h"
@@ -821,8 +822,7 @@ int main(int argc, char *argv[]) {
pa_memtrap_install();
- pa_volume_func_init_mmx();
- pa_volume_func_init_sse();
+ pa_cpu_init_x86();
pa_assert_se(mainloop = pa_mainloop_new());
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index 563dbb6..34df5cf 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -91,9 +91,6 @@ typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned chan
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f);
void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func);
-void pa_volume_func_init_mmx(void);
-void pa_volume_func_init_sse(void);
-
#define PA_CHANNEL_POSITION_MASK_LEFT \
(PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \
| PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT) \
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 3c22945..e56f7c3 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -31,6 +31,8 @@
#include <pulsecore/g711.h>
#include <pulsecore/core-util.h>
+#include "cpu-x86.h"
+
#include "sample-util.h"
#include "endianmacros.h"
@@ -142,7 +144,7 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
static void
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- int64_t channel, temp;
+ pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
@@ -203,8 +205,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"6: \n\t"
" emms \n\t"
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp)
- : "r" ((int64_t)channels)
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
: "cc"
);
}
@@ -212,7 +214,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
static void
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- int64_t channel, temp;
+ pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
@@ -279,8 +281,8 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"6: \n\t"
" emms \n\t"
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp)
- : "r" ((int64_t)channels)
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
: "cc"
);
}
@@ -443,7 +445,7 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#define RUN_TEST
+#undef RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
@@ -486,7 +488,7 @@ static void run_test (void) {
}
#endif
-void pa_volume_func_init_mmx (void) {
+void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
pa_log_info("Initialising MMX optimized functions.");
#ifdef RUN_TEST
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index ff583a0..b60471a 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -31,6 +31,8 @@
#include <pulsecore/g711.h>
#include <pulsecore/core-util.h>
+#include "cpu-x86.h"
+
#include "sample-util.h"
#include "endianmacros.h"
@@ -140,8 +142,7 @@ pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
static void
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- int64_t channel;
- int64_t temp;
+ pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
@@ -210,7 +211,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"8: \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
- : "r" ((int64_t)channels)
+ : "r" ((pa_reg_x86)channels)
: "cc"
);
}
@@ -218,8 +219,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
static void
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- int64_t channel;
- int64_t temp;
+ pa_reg_x86 channel, temp;
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
@@ -296,7 +296,7 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"8: \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
- : "r" ((int64_t)channels)
+ : "r" ((pa_reg_x86)channels)
: "cc"
);
}
@@ -459,7 +459,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#define RUN_TEST
+#undef RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
@@ -502,7 +502,7 @@ static void run_test (void) {
}
#endif
-void pa_volume_func_init_sse (void) {
+void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
pa_log_info("Initialising SSE optimized functions.");
#ifdef RUN_TEST
commit e396fe67fb3b0acec40c2334c426bcb284163d20
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 13 17:22:39 2009 +0200
cpu-x86: guard header with ifdef
diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h
index 8158ea7..07e630e 100644
--- a/src/pulsecore/cpu-x86.h
+++ b/src/pulsecore/cpu-x86.h
@@ -1,3 +1,6 @@
+#ifndef foocpux86hfoo
+#define foocpux86hfoo
+
/***
This file is part of PulseAudio.
@@ -59,3 +62,5 @@ typedef int64_t pa_reg_x86;
/* some optimized functions */
void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags);
void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags);
+
+#endif /* foocpux86hfoo */
commit dcae9a3113d1ce30e330c97dd5a81fec4e272bed
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 14 13:12:30 2009 +0200
svolume: add some comments
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index e56f7c3..b36fe94 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -101,6 +101,22 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
}
#endif
+/* in s: 2 int16_t samples
+ * in v: 2 int32_t volumes, fixed point 16:16
+ * out s: contains scaled and clamped int16_t samples.
+ *
+ * We calculate the high 32 bits of a 32x16 multiply which we then
+ * clamp to 16 bits. The calulcation is:
+ *
+ * vl = (v & 0xffff)
+ * vh = (v >> 16)
+ * s = ((s * vl) >> 16) + (s * vh);
+ *
+ * For the first multiply we have to do a sign correction as we need to
+ * multiply a signed int with an unsigned int. Hacker's delight 8-3 gives a
+ * simple formula to correct the sign of the high word after the signed
+ * multiply.
+ */
#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \
" pxor %%mm4, %%mm4 \n\t" /* .. | 0 | 0 | */ \
" punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \
@@ -116,6 +132,8 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig
" pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
" packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
+/* approximately advances %3 = (%3 + a) % b. This function requires that
+ * a <= b. */
#define MOD_ADD(a,b) \
" add "#a", %3 \n\t" \
" mov %3, %4 \n\t" \
commit a1235446a733164f00a96688784913172456a34e
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 14 15:19:26 2009 +0200
volume: make the benchmark more meaningfull
MMX is about 6x faster, SSE around 15x on my machine.
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index b36fe94..9ad7dea 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -24,8 +24,7 @@
#include <config.h>
#endif
-#include <alloca.h>
-
+#include <pulse/timeval.h>
#include <pulsecore/random.h>
#include <pulsecore/macro.h>
#include <pulsecore/g711.h>
@@ -478,31 +477,45 @@ static void run_test (void) {
int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
+ struct timeval start, stop;
- func = pa_get_volume_func (PA_SAMPLE_S16RE);
-
- printf ("checking MMX %d\n", sizeof (samples));
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
- for (j = 0; j < TIMES; j++) {
- pa_random (samples, sizeof (samples));
- memcpy (samples_ref, samples, sizeof (samples));
- memcpy (samples_orig, samples, sizeof (samples));
+ printf ("checking MMX %zd\n", sizeof (samples));
- for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 1;
- for (padding = 0; padding < PADDING; padding++, i++)
- volumes[i] = volumes[padding];
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
- pa_volume_s16re_mmx (samples, volumes, CHANNELS, sizeof (samples));
- func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
- for (i = 0; i < SAMPLES; i++) {
- if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
- }
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
}
}
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index b60471a..8138c6c 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -24,8 +24,7 @@
#include <config.h>
#endif
-#include <alloca.h>
-
+#include <pulse/timeval.h>
#include <pulsecore/random.h>
#include <pulsecore/macro.h>
#include <pulsecore/g711.h>
@@ -459,7 +458,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#undef RUN_TEST
+#define RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
@@ -474,31 +473,45 @@ static void run_test (void) {
int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
+ struct timeval start, stop;
- func = pa_get_volume_func (PA_SAMPLE_S16RE);
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
- printf ("checking SSE %d\n", sizeof (samples));
+ printf ("checking SSE %zd\n", sizeof (samples));
- for (j = 0; j < TIMES; j++) {
- pa_random (samples, sizeof (samples));
- memcpy (samples_ref, samples, sizeof (samples));
- memcpy (samples_orig, samples, sizeof (samples));
-
- for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 1;
- for (padding = 0; padding < PADDING; padding++, i++)
- volumes[i] = volumes[padding];
-
- pa_volume_s16re_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
- func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t));
-
- for (i = 0; i < SAMPLES; i++) {
- if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
- }
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
+
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
}
}
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
commit f24c24c14b6614cf19ee916886c8b02384bac435
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 14 15:41:32 2009 +0200
volume: improved comments
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 9ad7dea..d4fcedf 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -196,7 +196,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
" movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@@ -212,8 +212,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2)
- " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
- " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
" dec %2 \n\t"
@@ -270,7 +270,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
SWAP_16 (%%mm1)
VOLUME_32x16 (%%mm1, %%mm0)
SWAP_16 (%%mm0)
- " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
@@ -288,8 +288,8 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
VOLUME_32x16 (%%mm1, %%mm0)
VOLUME_32x16 (%%mm3, %%mm2)
SWAP_16_2 (%%mm0, %%mm2)
- " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */
- " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
" dec %2 \n\t"
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 8138c6c..d95fa9d 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -231,12 +231,12 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t"
- " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */
- " movw (%0), %4 \n\t"
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %4 \n\t" /* .. | p0 | */
" rorw $8, %4 \n\t"
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
- " movd %%xmm0, %4 \n\t"
+ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
" rorw $8, %4 \n\t"
" movw %4, (%0) \n\t"
" add $2, %0 \n\t"
@@ -244,31 +244,34 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
+ " test $1, %2 \n\t"
" je 4f \n\t"
- "3: \n\t" /* do samples in pairs of 2 */
- " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
- " movd %%xmm0, (%0) \n\t"
+ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
"4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
+ " test $1, %2 \n\t"
" je 6f \n\t"
- "5: \n\t" /* do samples in pairs of 4 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ /* FIXME, we can do aligned access of the volume values if we can guarantee
+ * that the array is 16 bytes aligned, we probably have to do the odd values
+ * after this then. */
+ "5: \n\t" /* do samples in groups of 4 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
- " movq %%xmm0, (%0) \n\t"
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
@@ -277,17 +280,17 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" cmp $0, %2 \n\t"
" je 8f \n\t"
- "7: \n\t" /* do samples in pairs of 8 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
- " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
- " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */
+ "7: \n\t" /* do samples in groups of 8 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
SWAP_16_2 (%%xmm1, %%xmm3)
VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2)
SWAP_16_2 (%%xmm0, %%xmm2)
- " movq %%xmm0, (%0) \n\t"
- " movq %%xmm2, 8(%0) \n\t"
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
" add $16, %0 \n\t"
MOD_ADD ($8, %5)
" dec %2 \n\t"
@@ -458,7 +461,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#define RUN_TEST
+#undef RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2
commit 591baacba5913de32e6556a71a8300d25addbec4
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 14 15:48:10 2009 +0200
volume: remove ref functions
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index d4fcedf..ad53927 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -35,71 +35,6 @@
#include "sample-util.h"
#include "endianmacros.h"
-#if 0
-static void
-pa_volume_u8_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) *samples - 0x80;
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
- *samples++ = (uint8_t) (t + 0x80);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_alaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_alaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_ulaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-#endif
-
/* in s: 2 int16_t samples
* in v: 2 int32_t volumes, fixed point 16:16
* out s: contains scaled and clamped int16_t samples.
@@ -304,164 +239,6 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
);
}
-#if 0
-static void
-pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- *samples++ *= volumes[channel];
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_float32re_mmx (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- float t;
-
- t = PA_FLOAT32_SWAP(*samples);
- t *= volumes[channel];
- *samples++ = PA_FLOAT32_SWAP(t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32ne_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t)(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = (int32_t) t;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32re_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) PA_INT32_SWAP(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_INT32_SWAP((int32_t) t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24ne_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24re_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32ne_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (*samples << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-#endif
-
#undef RUN_TEST
#ifdef RUN_TEST
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index d95fa9d..b0a6e0d 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -35,71 +35,6 @@
#include "sample-util.h"
#include "endianmacros.h"
-#if 0
-static void
-pa_volume_u8_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) *samples - 0x80;
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
- *samples++ = (uint8_t) (t + 0x80);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_alaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_alaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
-
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
-
- t = (int32_t) st_ulaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-#endif
-
#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \
" pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \
" punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \
@@ -303,164 +238,6 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
);
}
-#if 0
-static void
-pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- *samples++ *= volumes[channel];
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_float32re_sse (float *samples, float *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (float);
-
- for (channel = 0; length; length--) {
- float t;
-
- t = PA_FLOAT32_SWAP(*samples);
- t *= volumes[channel];
- *samples++ = PA_FLOAT32_SWAP(t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32ne_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t)(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = (int32_t) t;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s32re_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (int32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) PA_INT32_SWAP(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_INT32_SWAP((int32_t) t);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24ne_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24re_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
- uint8_t *e;
-
- e = samples + length;
-
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
-
- t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32ne_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (*samples << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-
-static void
-pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
-{
- unsigned channel;
-
- length /= sizeof (uint32_t);
-
- for (channel = 0; length; length--) {
- int64_t t;
-
- t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
-
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
-}
-#endif
-
#undef RUN_TEST
#ifdef RUN_TEST
commit 25724cdd40283a00e6edd9449d0f3cf16823b41b
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Fri Aug 14 19:45:39 2009 +0200
Get rid of liboil
Get rid of the liboil dependency and reimplement the liboil functions with an
equivalent C implementation. Note that most of these functions are deprecated in
liboil and that none of them had any optimisations. We can further specialize
our handrolled versions for some extra speedups.
diff --git a/configure.ac b/configure.ac
index 05312d3..40455e1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -941,12 +941,6 @@ AC_SUBST(AVAHI_LIBS)
AC_SUBST(HAVE_AVAHI)
AM_CONDITIONAL([HAVE_AVAHI], [test "x$HAVE_AVAHI" = x1])
-### LIBOIL ####
-
-PKG_CHECK_MODULES(LIBOIL, [ liboil-0.3 >= 0.3.0 ])
-AC_SUBST(LIBOIL_CFLAGS)
-AC_SUBST(LIBOIL_LIBS)
-
### JACK (optional) ####
AC_ARG_ENABLE([jack],
diff --git a/src/daemon/main.c b/src/daemon/main.c
index 774b4e9..31e434d 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -39,8 +39,6 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include <liboil/liboil.h>
-
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
@@ -863,8 +861,6 @@ int main(int argc, char *argv[]) {
win32_timer = pa_mainloop_get_api(mainloop)->rtclock_time_new(pa_mainloop_get_api(mainloop), pa_gettimeofday(&win32_tv), message_cb, NULL);
#endif
- oil_init();
-
if (!conf->no_cpu_limit)
pa_assert_se(pa_cpu_limit_init(pa_mainloop_get_api(mainloop)) == 0);
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 59e0a0c..a3c17f8 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -31,9 +31,6 @@
#include <speex/speex_resampler.h>
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
#include <pulse/xmalloc.h>
#include <pulsecore/sconv.h>
#include <pulsecore/log.h>
@@ -1045,33 +1042,46 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
return &r->buf1;
}
-static void vectoradd_s16_with_fraction(
- int16_t *d, int dstr,
- const int16_t *s1, int sstr1,
- const int16_t *s2, int sstr2,
- int n,
- float s3, float s4) {
+static void vectoradd_f32(
+ float *d, int dstr,
+ const float *s, int sstr,
+ int n, float s4) {
- int32_t i3, i4;
+ for (; n > 0; n--) {
+ *d = (float) (*d + (s4 * *s));
- i3 = (int32_t) (s3 * 0x10000);
- i4 = (int32_t) (s4 * 0x10000);
+ s = (const float*) ((const uint8_t*) s + sstr);
+ d = (float*) ((uint8_t*) d + dstr);
+ }
+}
+
+static void vectoradd_s16(
+ int16_t *d, int dstr,
+ const int16_t *s, int sstr,
+ int n) {
for (; n > 0; n--) {
- int32_t a, b;
+ *d = (int16_t) (*d + *s);
- a = *s1;
- b = *s2;
+ s = (const int16_t*) ((const uint8_t*) s + sstr);
+ d = (int16_t*) ((uint8_t*) d + dstr);
+ }
+}
- a = (a * i3) / 0x10000;
- b = (b * i4) / 0x10000;
+static void vectoradd_s16_with_fraction(
+ int16_t *d, int dstr,
+ const int16_t *s, int sstr,
+ int n, float s4) {
- *d = (int16_t) (a + b);
+ int32_t i4;
- s1 = (const int16_t*) ((const uint8_t*) s1 + sstr1);
- s2 = (const int16_t*) ((const uint8_t*) s2 + sstr2);
- d = (int16_t*) ((uint8_t*) d + dstr);
+ i4 = (int32_t) (s4 * 0x10000);
+
+ for (; n > 0; n--) {
+ *d = (int16_t) (*d + (((int32_t)*s * i4) >> 16));
+ s = (const int16_t*) ((const uint8_t*) s + sstr);
+ d = (int16_t*) ((uint8_t*) d + dstr);
}
}
@@ -1125,12 +1135,11 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
if (r->map_table[oc][ic] <= 0.0)
continue;
- oil_vectoradd_f32(
- (float*) dst + oc, o_skip,
+ vectoradd_f32(
(float*) dst + oc, o_skip,
(float*) src + ic, i_skip,
(int) n_frames,
- &one, &r->map_table[oc][ic]);
+ r->map_table[oc][ic]);
}
}
@@ -1147,23 +1156,19 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
continue;
if (r->map_table[oc][ic] >= 1.0) {
- static const int16_t one = 1;
- oil_vectoradd_s16(
- (int16_t*) dst + oc, o_skip,
+ vectoradd_s16(
(int16_t*) dst + oc, o_skip,
(int16_t*) src + ic, i_skip,
- (int) n_frames,
- &one, &one);
+ (int) n_frames);
} else
vectoradd_s16_with_fraction(
(int16_t*) dst + oc, o_skip,
- (int16_t*) dst + oc, o_skip,
(int16_t*) src + ic, i_skip,
(int) n_frames,
- 1.0f, r->map_table[oc][ic]);
+ r->map_table[oc][ic]);
}
}
@@ -1469,7 +1474,7 @@ static void trivial_resample(pa_resampler *r, const pa_memchunk *input, unsigned
pa_assert(o_index * fz < pa_memblock_get_length(output->memblock));
- oil_memcpy((uint8_t*) dst + fz * o_index,
+ memcpy((uint8_t*) dst + fz * o_index,
(uint8_t*) src + fz * j, (int) fz);
}
diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c
index 43b8cb3..0fefdf1 100644
--- a/src/pulsecore/sconv-s16le.c
+++ b/src/pulsecore/sconv-s16le.c
@@ -28,8 +28,6 @@
#include <inttypes.h>
#include <stdio.h>
-#include <liboil/liboilfuncs.h>
-
#include <pulsecore/sconv.h>
#include <pulsecore/macro.h>
#include <pulsecore/log.h>
@@ -86,17 +84,13 @@ void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) {
pa_assert(b);
#if SWAP_WORDS == 1
-
for (; n > 0; n--) {
int16_t s = *(a++);
*(b++) = ((float) INT16_FROM(s))/(float) 0x7FFF;
}
-
#else
-{
- static const double add = 0, factor = 1.0/0x7FFF;
- oil_scaleconv_f32_s16(b, a, (int) n, &add, &factor);
-}
+ for (; n > 0; n--)
+ *(b++) = ((float) (*(a++)))/(float) 0x7FFF;
#endif
}
@@ -105,17 +99,13 @@ void pa_sconv_s32le_to_float32ne(unsigned n, const int32_t *a, float *b) {
pa_assert(b);
#if SWAP_WORDS == 1
-
for (; n > 0; n--) {
int32_t s = *(a++);
*(b++) = (float) (((double) INT32_FROM(s))/0x7FFFFFFF);
}
-
#else
-{
- static const double add = 0, factor = 1.0/0x7FFFFFFF;
- oil_scaleconv_f32_s32(b, a, (int) n, &add, &factor);
-}
+ for (; n > 0; n--)
+ *(b++) = (float) (((double) (*(a++)))/0x7FFFFFFF);
#endif
}
@@ -124,7 +114,6 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {
pa_assert(b);
#if SWAP_WORDS == 1
-
for (; n > 0; n--) {
int16_t s;
float v = *(a++);
@@ -133,12 +122,13 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {
s = (int16_t) lrintf(v * 0x7FFF);
*(b++) = INT16_TO(s);
}
-
#else
-{
- static const double add = 0, factor = 0x7FFF;
- oil_scaleconv_s16_f32(b, a, (int) n, &add, &factor);
-}
+ for (; n > 0; n--) {
+ float v = *(a++);
+
+ v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.f);
+ *(b++) = (int16_t) lrintf(v * 0x7FFF);
+ }
#endif
}
@@ -147,7 +137,6 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {
pa_assert(b);
#if SWAP_WORDS == 1
-
for (; n > 0; n--) {
int32_t s;
float v = *(a++);
@@ -156,12 +145,13 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {
s = (int32_t) lrint((double) v * (double) 0x7FFFFFFF);
*(b++) = INT32_TO(s);
}
-
#else
-{
- static const double add = 0, factor = 0x7FFFFFFF;
- oil_scaleconv_s32_f32(b, a, (int) n, &add, &factor);
-}
+ for (; n > 0; n--) {
+ float v = *(a++);
+
+ v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f);
+ *(b++) = (int32_t) lrint((double) v * (double) 0x7FFFFFFF);
+ }
#endif
}
diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c
index d89f428..937bf5d 100644
--- a/src/pulsecore/sconv.c
+++ b/src/pulsecore/sconv.c
@@ -27,9 +27,6 @@
#include <stdio.h>
#include <stdlib.h>
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
#include <pulsecore/g711.h>
#include <pulsecore/macro.h>
@@ -41,32 +38,31 @@
/* u8 */
static void u8_to_float32ne(unsigned n, const uint8_t *a, float *b) {
- static const double add = -1, factor = 1.0/128.0;
-
pa_assert(a);
pa_assert(b);
- oil_scaleconv_f32_u8(b, a, (int) n, &add, &factor);
+ for (; n > 0; n--, a++, b++)
+ *b = (*a * 1.0/128.0) - 1.0;
}
static void u8_from_float32ne(unsigned n, const float *a, uint8_t *b) {
- static const double add = 128, factor = 127.0;
-
pa_assert(a);
pa_assert(b);
- oil_scaleconv_u8_f32(b, a, (int) n, &add, &factor);
+ for (; n > 0; n--, a++, b++) {
+ float v;
+ v = (*a * 127.0) + 128.0;
+ v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0);
+ *b = rint (v);
+ }
}
static void u8_to_s16ne(unsigned n, const uint8_t *a, int16_t *b) {
- static const int16_t add = -0x80, factor = 0x100;
-
pa_assert(a);
pa_assert(b);
- oil_conv_s16_u8(b, 2, a, 1, (int) n);
- oil_scalaradd_s16(b, 2, b, 2, &add, (int) n);
- oil_scalarmult_s16(b, 2, b, 2, &factor, (int) n);
+ for (; n > 0; n--, a++, b++)
+ *b = (((int16_t)*a) - 128) << 8;
}
static void u8_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) {
@@ -84,7 +80,7 @@ static void float32ne_to_float32ne(unsigned n, const float *a, float *b) {
pa_assert(a);
pa_assert(b);
- oil_memcpy(b, a, (int) (sizeof(float) * n));
+ memcpy(b, a, (int) (sizeof(float) * n));
}
static void float32re_to_float32ne(unsigned n, const float *a, float *b) {
@@ -101,7 +97,7 @@ static void s16ne_to_s16ne(unsigned n, const int16_t *a, int16_t *b) {
pa_assert(a);
pa_assert(b);
- oil_memcpy(b, a, (int) (sizeof(int16_t) * n));
+ memcpy(b, a, (int) (sizeof(int16_t) * n));
}
static void s16re_to_s16ne(unsigned n, const int16_t *a, int16_t *b) {
diff --git a/src/tests/envelope-test.c b/src/tests/envelope-test.c
index 3af3044..9382040 100644
--- a/src/tests/envelope-test.c
+++ b/src/tests/envelope-test.c
@@ -34,8 +34,6 @@
#include <pulsecore/memblock.h>
#include <pulsecore/sample-util.h>
-#include <liboil/liboil.h>
-
const pa_envelope_def ramp_down = {
.n_points = 2,
.points_x = { 100*PA_USEC_PER_MSEC, 300*PA_USEC_PER_MSEC },
@@ -202,7 +200,6 @@ int main(int argc, char *argv[]) {
.values = { PA_VOLUME_NORM, PA_VOLUME_NORM/2 }
};
- oil_init();
pa_log_set_level(PA_LOG_DEBUG);
pa_assert_se(pool = pa_mempool_new(FALSE, 0));
diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c
index f9f76da..457c4ac 100644
--- a/src/tests/mix-test.c
+++ b/src/tests/mix-test.c
@@ -32,8 +32,6 @@
#include <pulsecore/memblock.h>
#include <pulsecore/sample-util.h>
-#include <liboil/liboil.h>
-
static float swap_float(float a) {
uint32_t *b = (uint32_t*) &a;
*b = PA_UINT32_SWAP(*b);
@@ -211,7 +209,6 @@ int main(int argc, char *argv[]) {
pa_sample_spec a;
pa_cvolume v;
- oil_init();
pa_log_set_level(PA_LOG_DEBUG);
pa_assert_se(pool = pa_mempool_new(FALSE, 0));
diff --git a/src/tests/remix-test.c b/src/tests/remix-test.c
index 9d110d6..4990bf9 100644
--- a/src/tests/remix-test.c
+++ b/src/tests/remix-test.c
@@ -32,8 +32,6 @@
#include <pulsecore/memblock.h>
#include <pulsecore/sample-util.h>
-#include <liboil/liboil.h>
-
int main(int argc, char *argv[]) {
static const pa_channel_map maps[] = {
@@ -55,7 +53,6 @@ int main(int argc, char *argv[]) {
unsigned i, j;
pa_mempool *pool;
- oil_init();
pa_log_set_level(PA_LOG_DEBUG);
pa_assert_se(pool = pa_mempool_new(FALSE, 0));
diff --git a/src/tests/resampler-test.c b/src/tests/resampler-test.c
index 7236265..82198b5 100644
--- a/src/tests/resampler-test.c
+++ b/src/tests/resampler-test.c
@@ -32,8 +32,6 @@
#include <pulsecore/memblock.h>
#include <pulsecore/sample-util.h>
-#include <liboil/liboil.h>
-
static void dump_block(const pa_sample_spec *ss, const pa_memchunk *chunk) {
void *d;
unsigned i;
@@ -248,7 +246,6 @@ int main(int argc, char *argv[]) {
pa_sample_spec a, b;
pa_cvolume v;
- oil_init();
pa_log_set_level(PA_LOG_DEBUG);
pa_assert_se(pool = pa_mempool_new(FALSE, 0));
commit 601e5f1867065912e1740e2408a948ca818f6c59
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Mon Aug 17 11:35:47 2009 +0200
resampler: cache integer channel_map
Calculate and cache an integer version of the channel map so that we don't have
to regenerate it when dealing with s16 samples.
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index a3c17f8..e3473ac 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -61,7 +61,8 @@ struct pa_resampler {
pa_convert_func_t to_work_format_func;
pa_convert_func_t from_work_format_func;
- float map_table[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
pa_bool_t map_required;
void (*impl_free)(pa_resampler *r);
@@ -587,7 +588,8 @@ static void calc_map_table(pa_resampler *r) {
if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm)))))
return;
- memset(r->map_table, 0, sizeof(r->map_table));
+ memset(r->map_table_f, 0, sizeof(r->map_table_f));
+ memset(r->map_table_i, 0, sizeof(r->map_table_i));
memset(ic_connected, 0, sizeof(ic_connected));
remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0;
@@ -602,7 +604,7 @@ static void calc_map_table(pa_resampler *r) {
/* We shall not do any remapping. Hence, just check by index */
if (ic == oc)
- r->map_table[oc][ic] = 1.0;
+ r->map_table_f[oc][ic] = 1.0;
continue;
}
@@ -611,7 +613,7 @@ static void calc_map_table(pa_resampler *r) {
/* We shall not do any remixing. Hence, just check by name */
if (a == b)
- r->map_table[oc][ic] = 1.0;
+ r->map_table_f[oc][ic] = 1.0;
continue;
}
@@ -686,7 +688,7 @@ static void calc_map_table(pa_resampler *r) {
*/
if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) {
- r->map_table[oc][ic] = 1.0;
+ r->map_table_f[oc][ic] = 1.0;
oc_connected = TRUE;
ic_connected[ic] = TRUE;
@@ -711,7 +713,7 @@ static void calc_map_table(pa_resampler *r) {
if (n > 0)
for (ic = 0; ic < r->i_ss.channels; ic++)
if (on_left(r->i_cm.map[ic])) {
- r->map_table[oc][ic] = 1.0f / (float) n;
+ r->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -732,7 +734,7 @@ static void calc_map_table(pa_resampler *r) {
if (n > 0)
for (ic = 0; ic < r->i_ss.channels; ic++)
if (on_right(r->i_cm.map[ic])) {
- r->map_table[oc][ic] = 1.0f / (float) n;
+ r->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -753,7 +755,7 @@ static void calc_map_table(pa_resampler *r) {
if (n > 0) {
for (ic = 0; ic < r->i_ss.channels; ic++)
if (on_center(r->i_cm.map[ic])) {
- r->map_table[oc][ic] = 1.0f / (float) n;
+ r->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
} else {
@@ -770,7 +772,7 @@ static void calc_map_table(pa_resampler *r) {
if (n > 0)
for (ic = 0; ic < r->i_ss.channels; ic++)
if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) {
- r->map_table[oc][ic] = 1.0f / (float) n;
+ r->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -787,9 +789,9 @@ static void calc_map_table(pa_resampler *r) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
if (!(r->flags & PA_RESAMPLER_NO_LFE))
- r->map_table[oc][ic] = 1.0f / (float) r->i_ss.channels;
+ r->map_table_f[oc][ic] = 1.0f / (float) r->i_ss.channels;
else
- r->map_table[oc][ic] = 0;
+ r->map_table_f[oc][ic] = 0;
/* Please note that a channel connected to LFE
* doesn't really count as connected. */
@@ -836,12 +838,12 @@ static void calc_map_table(pa_resampler *r) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
if (ic_connected[ic]) {
- r->map_table[oc][ic] *= .9f;
+ r->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_left(r->i_cm.map[ic]))
- r->map_table[oc][ic] = .1f / (float) ic_unconnected_left;
+ r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left;
}
}
}
@@ -861,12 +863,12 @@ static void calc_map_table(pa_resampler *r) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
if (ic_connected[ic]) {
- r->map_table[oc][ic] *= .9f;
+ r->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_right(r->i_cm.map[ic]))
- r->map_table[oc][ic] = .1f / (float) ic_unconnected_right;
+ r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right;
}
}
}
@@ -887,12 +889,12 @@ static void calc_map_table(pa_resampler *r) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
if (ic_connected[ic]) {
- r->map_table[oc][ic] *= .9f;
+ r->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_center(r->i_cm.map[ic])) {
- r->map_table[oc][ic] = .1f / (float) ic_unconnected_center;
+ r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center;
mixed_in = TRUE;
}
}
@@ -950,7 +952,7 @@ static void calc_map_table(pa_resampler *r) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
if (ic_connected[ic]) {
- r->map_table[oc][ic] *= .75f;
+ r->map_table_f[oc][ic] *= .75f;
continue;
}
@@ -958,7 +960,7 @@ static void calc_map_table(pa_resampler *r) {
continue;
if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc]))
- r->map_table[oc][ic] = .375f / (float) ncenter[oc];
+ r->map_table_f[oc][ic] = .375f / (float) ncenter[oc];
}
}
}
@@ -975,11 +977,14 @@ static void calc_map_table(pa_resampler *r) {
continue;
for (oc = 0; oc < r->o_ss.channels; oc++)
- r->map_table[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
+ r->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
}
}
}
-
+ /* make an 16:16 int version of the matrix */
+ for (oc = 0; oc < r->o_ss.channels; oc++)
+ for (ic = 0; ic < r->i_ss.channels; ic++)
+ r->map_table_i[oc][ic] = (int32_t) (r->map_table_f[oc][ic] * 0x10000);
s = pa_strbuf_new();
@@ -996,7 +1001,7 @@ static void calc_map_table(pa_resampler *r) {
pa_strbuf_printf(s, "O%02u |", oc);
for (ic = 0; ic < r->i_ss.channels; ic++)
- pa_strbuf_printf(s, " %1.3f", r->map_table[oc][ic]);
+ pa_strbuf_printf(s, " %1.3f", r->map_table_f[oc][ic]);
pa_strbuf_puts(s, "\n");
}
@@ -1071,11 +1076,7 @@ static void vectoradd_s16(
static void vectoradd_s16_with_fraction(
int16_t *d, int dstr,
const int16_t *s, int sstr,
- int n, float s4) {
-
- int32_t i4;
-
- i4 = (int32_t) (s4 * 0x10000);
+ int n, int32_t i4) {
for (; n > 0; n--) {
*d = (int16_t) (*d + (((int32_t)*s * i4) >> 16));
@@ -1128,18 +1129,17 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
for (oc = 0; oc < r->o_ss.channels; oc++) {
unsigned ic;
- static const float one = 1.0;
for (ic = 0; ic < r->i_ss.channels; ic++) {
- if (r->map_table[oc][ic] <= 0.0)
+ if (r->map_table_f[oc][ic] <= 0.0)
continue;
vectoradd_f32(
(float*) dst + oc, o_skip,
(float*) src + ic, i_skip,
(int) n_frames,
- r->map_table[oc][ic]);
+ r->map_table_f[oc][ic]);
}
}
@@ -1152,10 +1152,10 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
for (ic = 0; ic < r->i_ss.channels; ic++) {
- if (r->map_table[oc][ic] <= 0.0)
+ if (r->map_table_f[oc][ic] <= 0.0)
continue;
- if (r->map_table[oc][ic] >= 1.0) {
+ if (r->map_table_f[oc][ic] >= 1.0) {
vectoradd_s16(
(int16_t*) dst + oc, o_skip,
@@ -1168,7 +1168,7 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
(int16_t*) dst + oc, o_skip,
(int16_t*) src + ic, i_skip,
(int) n_frames,
- r->map_table[oc][ic]);
+ r->map_table_i[oc][ic]);
}
}
commit a98fa950d2f04e2ba4d4a470296a081e1050f76d
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 15:56:44 2009 +0200
svolume: remove unneeded compare
We don't need the compare because the sub operation already set the right flags
for us.
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index ad53927..5243b44 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -72,7 +72,6 @@
" add "#a", %3 \n\t" \
" mov %3, %4 \n\t" \
" sub "#b", %4 \n\t" \
- " cmp "#b", %3 \n\t" \
" cmovae %4, %3 \n\t"
/* swap 16 bits */
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index b0a6e0d..98f828c 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -52,8 +52,7 @@
" add "#a", %3 \n\t" /* channel += inc */ \
" mov %3, %4 \n\t" \
" sub "#b", %4 \n\t" /* tmp = channel - channels */ \
- " cmp "#b", %3 \n\t" /* if (channel >= channels) */ \
- " cmovae %4, %3 \n\t" /* channel = tmp */
+ " cmovae %4, %3 \n\t" /* if (tmp >= 0) channel = tmp */
/* swap 16 bits */
#define SWAP_16(s) \
commit 951bf1b28d25a93b99cbe074a46b8313a9e5f9f0
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 16:09:14 2009 +0200
svolume: add ARM optimized volume scaling
diff --git a/src/Makefile.am b/src/Makefile.am
index 4e90d79..eca68b1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -825,8 +825,8 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/resampler.c pulsecore/resampler.h \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
- pulsecore/cpu-x86.c \
- pulsecore/svolume_c.c \
+ pulsecore/cpu-arm.c pulsecore/cpu-x86.c \
+ pulsecore/svolume_c.c pulsecore/svolume_arm.c\
pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \
pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \
pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \
diff --git a/src/daemon/main.c b/src/daemon/main.c
index 31e434d..ec8ff40 100644
--- a/src/daemon/main.c
+++ b/src/daemon/main.c
@@ -93,6 +93,7 @@
#ifdef HAVE_DBUS
#include <pulsecore/dbus-shared.h>
#endif
+#include <pulsecore/cpu-arm.h>
#include <pulsecore/cpu-x86.h>
#include "cmdline.h"
@@ -821,6 +822,7 @@ int main(int argc, char *argv[]) {
pa_memtrap_install();
pa_cpu_init_x86();
+ pa_cpu_init_arm();
pa_assert_se(mainloop = pa_mainloop_new());
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
new file mode 100644
index 0000000..75646fe
--- /dev/null
+++ b/src/pulsecore/cpu-arm.c
@@ -0,0 +1,43 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdint.h>
+
+#include <pulsecore/log.h>
+
+#include "cpu-arm.h"
+
+static pa_cpu_arm_flag_t pa_cpu_arm_flags;
+
+void pa_cpu_init_arm (void) {
+#if defined (__arm__)
+ pa_cpu_arm_flags = 0;
+
+ pa_log ("ARM init\n");
+
+ pa_volume_func_init_arm (pa_cpu_arm_flags);
+#endif /* defined (__arm__) */
+}
diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h
new file mode 100644
index 0000000..1a0ac27
--- /dev/null
+++ b/src/pulsecore/cpu-arm.h
@@ -0,0 +1,39 @@
+#ifndef foocpuarmhfoo
+#define foocpuarmhfoo
+
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <stdint.h>
+
+typedef enum pa_cpu_arm_flag {
+ PA_CPU_ARM_V6 = (1 << 0),
+ PA_CPU_ARM_NEON = (1 << 1),
+ PA_CPU_ARM_VFP = (1 << 2)
+} pa_cpu_arm_flag_t;
+
+void pa_cpu_init_arm (void);
+
+/* some optimized functions */
+void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags);
+
+#endif /* foocpuarmhfoo */
diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c
new file mode 100644
index 0000000..7e25a13
--- /dev/null
+++ b/src/pulsecore/svolume_arm.c
@@ -0,0 +1,195 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulse/timeval.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "cpu-arm.h"
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if defined (__arm__)
+
+#define MOD_INC() \
+ " subs r0, r6, %2 \n\t" \
+ " addcs r0, %1 \n\t" \
+ " movcs r6, r0 \n\t"
+
+static void
+pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+ int32_t *ve;
+
+ channels = MAX (4, channels);
+ ve = volumes + channels;
+
+ __asm__ __volatile__ (
+ " mov r6, %1 \n\t"
+ " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
+ " tst %3, #1 \n\t" /* check for odd samples */
+ " beq 2f \n\t"
+
+ "1: \n\t"
+ " ldr r0, [r6], #4 \n\t" /* odd samples volumes */
+ " ldrh r2, [%0] \n\t"
+
+ " smulwb r0, r0, r2 \n\t"
+ " ssat r0, #16, r0 \n\t"
+
+ " strh r0, [%0], #2 \n\t"
+
+ MOD_INC()
+
+ "2: \n\t"
+ " mov %3, %3, LSR #1 \n\t"
+ " tst %3, #1 \n\t" /* check for odd samples */
+ " beq 4f \n\t"
+
+ "3: \n\t"
+ " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
+ " ldr r0, [%0] \n\t"
+
+ " smulwt r2, r2, r0 \n\t"
+ " smulwb r3, r3, r0 \n\t"
+
+ " ssat r2, #16, r2 \n\t"
+ " ssat r3, #16, r3 \n\t"
+
+ " pkhbt r0, r3, r2, LSL #16 \n\t"
+ " str r0, [%0], #4 \n\t"
+
+ MOD_INC()
+
+ "4: \n\t"
+ " movs %3, %3, LSR #1 \n\t"
+ " beq 6f \n\t"
+
+ "5: \n\t"
+ " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
+ " ldrd r4, [r6], #8 \n\t"
+ " ldrd r0, [%0] \n\t"
+
+ " smulwt r2, r2, r0 \n\t"
+ " smulwb r3, r3, r0 \n\t"
+ " smulwt r4, r4, r1 \n\t"
+ " smulwb r5, r5, r1 \n\t"
+
+ " ssat r2, #16, r2 \n\t"
+ " ssat r3, #16, r3 \n\t"
+ " ssat r4, #16, r4 \n\t"
+ " ssat r5, #16, r5 \n\t"
+
+ " pkhbt r0, r3, r2, LSL #16 \n\t"
+ " pkhbt r1, r5, r4, LSL #16 \n\t"
+ " strd r0, [%0], #8 \n\t"
+
+ MOD_INC()
+
+ " subs %3, %3, #1 \n\t"
+ " bne 5b \n\t"
+ "6: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
+ :
+ : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
+ );
+}
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 2
+#define SAMPLES 1023
+#define TIMES 1000
+#define PADDING 16
+
+static void run_test (void) {
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS + PADDING];
+ int i, j, padding;
+ pa_do_volume_func_t func;
+ struct timeval start, stop;
+
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking ARM %zd\n", sizeof (samples));
+
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
+
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+ }
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+}
+#endif
+
+#endif /* defined (__arm__) */
+
+
+void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
+#if defined (__arm__)
+ pa_log_info("Initialising ARM optimized functions.");
+
+#ifdef RUN_TEST
+ run_test ();
+#endif
+
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
+#endif /* defined (__arm__) */
+}
commit bd49d43bd387758f151c56b7ed1643ecb72c0258
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 16:09:48 2009 +0200
svolume: add CPU guards around code
Mark code that should only be compiled on x86 CPUs with proper defines.
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 5243b44..fb4c82c 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -35,6 +35,7 @@
#include "sample-util.h"
#include "endianmacros.h"
+#if defined (__i386__) || defined (__amd64__)
/* in s: 2 int16_t samples
* in v: 2 int32_t volumes, fixed point 16:16
* out s: contains scaled and clamped int16_t samples.
@@ -295,7 +296,11 @@ static void run_test (void) {
}
#endif
+#endif /* defined (__i386__) || defined (__amd64__) */
+
+
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
pa_log_info("Initialising MMX optimized functions.");
#ifdef RUN_TEST
@@ -304,4 +309,5 @@ void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
+#endif /* defined (__i386__) || defined (__amd64__) */
}
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 98f828c..141c466 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -35,6 +35,8 @@
#include "sample-util.h"
#include "endianmacros.h"
+#if defined (__i386__) || defined (__amd64__)
+
#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \
" pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \
" punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \
@@ -293,8 +295,10 @@ static void run_test (void) {
pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
+#endif /* defined (__i386__) || defined (__amd64__) */
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
pa_log_info("Initialising SSE optimized functions.");
#ifdef RUN_TEST
@@ -303,4 +307,5 @@ void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
+#endif /* defined (__i386__) || defined (__amd64__) */
}
commit b4e9942c2f3929b4baf4b53b0561102af7845269
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 16:15:18 2009 +0200
resample: refactor the channel remapping a little
Factor out the channel remap matrix code into a separate function.
Keep a pointer to the channel remapping function so we can install custom
functions.
Catch the common mono->stereo remapping case and install a custom, more
optimized function.
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index e3473ac..4fb03ce 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -44,6 +44,11 @@
/* Number of samples of extra space we allow the resamplers to return */
#define EXTRA_FRAMES 128
+typedef void (*pa_do_remap_func_t) (pa_resampler *r, void *d, const void *s, unsigned n);
+
+static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n);
+static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n);
+
struct pa_resampler {
pa_resample_method_t method;
pa_resample_flags_t flags;
@@ -64,6 +69,7 @@ struct pa_resampler {
float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
pa_bool_t map_required;
+ pa_do_remap_func_t do_remap;
void (*impl_free)(pa_resampler *r);
void (*impl_update_rates)(pa_resampler *r);
@@ -1008,6 +1014,17 @@ static void calc_map_table(pa_resampler *r) {
pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s));
pa_xfree(t);
+
+ /* find some common channel remappings, fall back to full matrix operation. */
+ if (r->i_ss.channels == 1 && r->o_ss.channels == 2 &&
+ r->map_table_i[0][0] == 1.0 && r->map_table_i[1][0] == 1.0) {
+ r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;;
+ pa_log_debug("Using mono to stereo remapping");
+ } else {
+ r->do_remap = (pa_do_remap_func_t) remap_channels_matrix;
+ pa_log_debug("Using generic matrix remapping");
+ }
+
}
static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) {
@@ -1047,49 +1064,111 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
return &r->buf1;
}
-static void vectoradd_f32(
- float *d, int dstr,
- const float *s, int sstr,
- int n, float s4) {
+static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
+
+ switch (r->work_format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d, *s;
+
+ d = (float *) dst;
+ s = (float *) src;
+
+ for (; n > 0; n--) {
+ *d++ = *s;
+ *d++ = *s++;
+ }
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d, *s;
- for (; n > 0; n--) {
- *d = (float) (*d + (s4 * *s));
+ d = (int16_t *) dst;
+ s = (int16_t *) src;
- s = (const float*) ((const uint8_t*) s + sstr);
- d = (float*) ((uint8_t*) d + dstr);
+ for (; n > 0; n--) {
+ *d++ = *s;
+ *d++ = *s++;
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
}
}
-static void vectoradd_s16(
- int16_t *d, int dstr,
- const int16_t *s, int sstr,
- int n) {
+static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) {
+ unsigned oc;
+ unsigned n_ic, n_oc;
- for (; n > 0; n--) {
- *d = (int16_t) (*d + *s);
+ n_ic = r->i_ss.channels;
+ n_oc = r->o_ss.channels;
- s = (const int16_t*) ((const uint8_t*) s + sstr);
- d = (int16_t*) ((uint8_t*) d + dstr);
- }
-}
+ memset(dst, 0, r->buf2.length);
-static void vectoradd_s16_with_fraction(
- int16_t *d, int dstr,
- const int16_t *s, int sstr,
- int n, int32_t i4) {
+ switch (r->work_format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d, *s;
- for (; n > 0; n--) {
- *d = (int16_t) (*d + (((int32_t)*s * i4) >> 16));
+ for (oc = 0; oc < n_oc; oc++) {
+ unsigned ic;
- s = (const int16_t*) ((const uint8_t*) s + sstr);
- d = (int16_t*) ((uint8_t*) d + dstr);
+ for (ic = 0; ic < n_ic; ic++) {
+ float vol;
+
+ vol = r->map_table_f[oc][ic];
+
+ if (vol <= 0.0)
+ continue;
+
+ d = (float *)dst + oc;
+ s = (float *)src + ic;
+
+ for (; n > 0; n--, s += n_ic, d += n_oc)
+ *d += *s * vol;
+ }
+ }
+
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d, *s;
+
+ for (oc = 0; oc < n_oc; oc++) {
+ unsigned ic;
+
+ for (ic = 0; ic < n_ic; ic++) {
+ int32_t vol;
+
+ vol = r->map_table_i[oc][ic];
+
+ if (vol <= 0)
+ continue;
+
+ d = (int16_t *)dst + oc;
+ s = (int16_t *)src + ic;
+
+ if (vol >= 0x10000) {
+ for (; n > 0; n--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (; n > 0; n--, s += n_ic, d += n_oc)
+ *d = (int16_t) (*d + (((int32_t)*s * vol) >> 16));
+ }
+ }
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
}
}
static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
unsigned in_n_samples, out_n_samples, n_frames;
- int i_skip, o_skip;
- unsigned oc;
void *src, *dst;
pa_assert(r);
@@ -1119,70 +1198,12 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index);
dst = pa_memblock_acquire(r->buf2.memblock);
- memset(dst, 0, r->buf2.length);
-
- o_skip = (int) (r->w_sz * r->o_ss.channels);
- i_skip = (int) (r->w_sz * r->i_ss.channels);
-
- switch (r->work_format) {
- case PA_SAMPLE_FLOAT32NE:
-
- for (oc = 0; oc < r->o_ss.channels; oc++) {
- unsigned ic;
-
- for (ic = 0; ic < r->i_ss.channels; ic++) {
-
- if (r->map_table_f[oc][ic] <= 0.0)
- continue;
-
- vectoradd_f32(
- (float*) dst + oc, o_skip,
- (float*) src + ic, i_skip,
- (int) n_frames,
- r->map_table_f[oc][ic]);
- }
- }
-
- break;
-
- case PA_SAMPLE_S16NE:
-
- for (oc = 0; oc < r->o_ss.channels; oc++) {
- unsigned ic;
-
- for (ic = 0; ic < r->i_ss.channels; ic++) {
-
- if (r->map_table_f[oc][ic] <= 0.0)
- continue;
-
- if (r->map_table_f[oc][ic] >= 1.0) {
-
- vectoradd_s16(
- (int16_t*) dst + oc, o_skip,
- (int16_t*) src + ic, i_skip,
- (int) n_frames);
-
- } else
-
- vectoradd_s16_with_fraction(
- (int16_t*) dst + oc, o_skip,
- (int16_t*) src + ic, i_skip,
- (int) n_frames,
- r->map_table_i[oc][ic]);
- }
- }
-
- break;
-
- default:
- pa_assert_not_reached();
- }
+ pa_assert (r->do_remap);
+ r->do_remap (r, dst, src, n_frames);
pa_memblock_release(input->memblock);
pa_memblock_release(r->buf2.memblock);
- r->buf2.length = out_n_samples * r->w_sz;
-
return &r->buf2;
}
commit d04a6e935f8352a4ffd93cb1aeddac8f605a099a
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 16:23:55 2009 +0200
resample: fix counters
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 4fb03ce..cc57b54 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -1099,7 +1099,7 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un
}
static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) {
- unsigned oc;
+ unsigned oc, i;
unsigned n_ic, n_oc;
n_ic = r->i_ss.channels;
@@ -1126,7 +1126,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
d = (float *)dst + oc;
s = (float *)src + ic;
- for (; n > 0; n--, s += n_ic, d += n_oc)
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s * vol;
}
}
@@ -1152,10 +1152,10 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
s = (int16_t *)src + ic;
if (vol >= 0x10000) {
- for (; n > 0; n--, s += n_ic, d += n_oc)
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
} else {
- for (; n > 0; n--, s += n_ic, d += n_oc)
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d = (int16_t) (*d + (((int32_t)*s * vol) >> 16));
}
}
commit 548b735ccf8474ebe60137375cdda4e58582efc3
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 17:24:23 2009 +0200
resampler: fix identity check
Fix the identity matrix check for mono to stereo.
Help the compiler generate better code for the C implementation of the channel
remapping code.
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index cc57b54..2256516 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -1017,7 +1017,7 @@ static void calc_map_table(pa_resampler *r) {
/* find some common channel remappings, fall back to full matrix operation. */
if (r->i_ss.channels == 1 && r->o_ss.channels == 2 &&
- r->map_table_i[0][0] == 1.0 && r->map_table_i[1][0] == 1.0) {
+ r->map_table_f[0][0] >= 1.0 && r->map_table_f[1][0] >= 1.0) {
r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;;
pa_log_debug("Using mono to stereo remapping");
} else {
@@ -1074,10 +1074,8 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un
d = (float *) dst;
s = (float *) src;
- for (; n > 0; n--) {
- *d++ = *s;
- *d++ = *s++;
- }
+ for (; n > 0; n--, s++, d += 2)
+ d[0] = d[1] = *s;
break;
}
case PA_SAMPLE_S16NE:
@@ -1087,10 +1085,8 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un
d = (int16_t *) dst;
s = (int16_t *) src;
- for (; n > 0; n--) {
- *d++ = *s;
- *d++ = *s++;
- }
+ for (; n > 0; n--, s++, d += 2)
+ d[0] = d[1] = *s;
break;
}
default:
@@ -1156,7 +1152,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
*d += *s;
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d = (int16_t) (*d + (((int32_t)*s * vol) >> 16));
+ *d += (int16_t) (((int32_t)*s * vol) >> 16);
}
}
}
commit d2389ef96e21825bb4e945f6c71b5bd27c5fa2b4
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 17:27:17 2009 +0200
sample: manually inline table lookups
Manually inline some table lookups to avoid excessive calls to
pa_sample_spec_valid().
diff --git a/src/pulse/sample.c b/src/pulse/sample.c
index d5d38ed..9698d8a 100644
--- a/src/pulse/sample.c
+++ b/src/pulse/sample.c
@@ -36,28 +36,27 @@
#include "sample.h"
-size_t pa_sample_size_of_format(pa_sample_format_t f) {
-
- static const size_t table[] = {
- [PA_SAMPLE_U8] = 1,
- [PA_SAMPLE_ULAW] = 1,
- [PA_SAMPLE_ALAW] = 1,
- [PA_SAMPLE_S16LE] = 2,
- [PA_SAMPLE_S16BE] = 2,
- [PA_SAMPLE_FLOAT32LE] = 4,
- [PA_SAMPLE_FLOAT32BE] = 4,
- [PA_SAMPLE_S32LE] = 4,
- [PA_SAMPLE_S32BE] = 4,
- [PA_SAMPLE_S24LE] = 3,
- [PA_SAMPLE_S24BE] = 3,
- [PA_SAMPLE_S24_32LE] = 4,
- [PA_SAMPLE_S24_32BE] = 4
- };
+static const size_t size_table[] = {
+ [PA_SAMPLE_U8] = 1,
+ [PA_SAMPLE_ULAW] = 1,
+ [PA_SAMPLE_ALAW] = 1,
+ [PA_SAMPLE_S16LE] = 2,
+ [PA_SAMPLE_S16BE] = 2,
+ [PA_SAMPLE_FLOAT32LE] = 4,
+ [PA_SAMPLE_FLOAT32BE] = 4,
+ [PA_SAMPLE_S32LE] = 4,
+ [PA_SAMPLE_S32BE] = 4,
+ [PA_SAMPLE_S24LE] = 3,
+ [PA_SAMPLE_S24BE] = 3,
+ [PA_SAMPLE_S24_32LE] = 4,
+ [PA_SAMPLE_S24_32BE] = 4
+};
+size_t pa_sample_size_of_format(pa_sample_format_t f) {
pa_assert(f >= 0);
pa_assert(f < PA_SAMPLE_MAX);
- return table[f];
+ return size_table[f];
}
size_t pa_sample_size(const pa_sample_spec *spec) {
@@ -65,35 +64,35 @@ size_t pa_sample_size(const pa_sample_spec *spec) {
pa_assert(spec);
pa_return_val_if_fail(pa_sample_spec_valid(spec), 0);
- return pa_sample_size_of_format(spec->format);
+ return size_table[spec->format];
}
size_t pa_frame_size(const pa_sample_spec *spec) {
pa_assert(spec);
pa_return_val_if_fail(pa_sample_spec_valid(spec), 0);
- return pa_sample_size(spec) * spec->channels;
+ return size_table[spec->format] * spec->channels;
}
size_t pa_bytes_per_second(const pa_sample_spec *spec) {
pa_assert(spec);
pa_return_val_if_fail(pa_sample_spec_valid(spec), 0);
- return spec->rate*pa_frame_size(spec);
+ return spec->rate * size_table[spec->format] * spec->channels;
}
pa_usec_t pa_bytes_to_usec(uint64_t length, const pa_sample_spec *spec) {
pa_assert(spec);
pa_return_val_if_fail(pa_sample_spec_valid(spec), 0);
- return (((pa_usec_t) (length / pa_frame_size(spec)) * PA_USEC_PER_SEC) / spec->rate);
+ return (((pa_usec_t) (length / (size_table[spec->format] * spec->channels)) * PA_USEC_PER_SEC) / spec->rate);
}
size_t pa_usec_to_bytes(pa_usec_t t, const pa_sample_spec *spec) {
pa_assert(spec);
pa_return_val_if_fail(pa_sample_spec_valid(spec), 0);
- return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * pa_frame_size(spec);
+ return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * (size_table[spec->format] * spec->channels);
}
pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) {
@@ -109,12 +108,12 @@ pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) {
int pa_sample_spec_valid(const pa_sample_spec *spec) {
pa_assert(spec);
- if (spec->rate <= 0 ||
+ if (PA_UNLIKELY (spec->rate <= 0 ||
spec->rate > PA_RATE_MAX ||
spec->channels <= 0 ||
spec->channels > PA_CHANNELS_MAX ||
spec->format >= PA_SAMPLE_MAX ||
- spec->format < 0)
+ spec->format < 0))
return 0;
return 1;
commit 370016c0e73236830513a9ea9c16366c15bd30a2
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 18:52:28 2009 +0200
svolume: fix compilation in 32bits
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index fb4c82c..86af76d 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -114,11 +114,11 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 2f \n\t"
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %4 \n\t" /* .. | p0 | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
" movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
- " movw %4, (%0) \n\t"
+ " movw %w4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
@@ -184,13 +184,13 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 2f \n\t"
" movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %4 \n\t" /* .. | p0 | */
- " rorw $8, %4 \n\t"
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " rorw $8, %w4 \n\t"
" movd %4, %%mm1 \n\t"
VOLUME_32x16 (%%mm1, %%mm0)
" movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
- " rorw $8, %4 \n\t"
- " movw %4, (%0) \n\t"
+ " rorw $8, %w4 \n\t"
+ " movw %w4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 141c466..0054d30 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -91,11 +91,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 2f \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %4 \n\t" /* .. | p0 | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
- " movw %4, (%0) \n\t"
+ " movw %w4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
@@ -168,13 +168,13 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" je 2f \n\t"
" movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %4 \n\t" /* .. | p0 | */
- " rorw $8, %4 \n\t"
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " rorw $8, %w4 \n\t"
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
" movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
- " rorw $8, %4 \n\t"
- " movw %4, (%0) \n\t"
+ " rorw $8, %w4 \n\t"
+ " movw %w4, (%0) \n\t"
" add $2, %0 \n\t"
MOD_ADD ($1, %5)
commit 078bde1b49a11f6c76e47fea19f9d920a45ce3f1
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 19:50:42 2009 +0200
x86: keep the cpu flags local
diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
index 2da31c9..453ecf5 100644
--- a/src/pulsecore/cpu-x86.c
+++ b/src/pulsecore/cpu-x86.c
@@ -45,12 +45,11 @@ get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
}
#endif
-static pa_cpu_x86_flag_t pa_cpu_x86_flags;
-
void pa_cpu_init_x86 (void) {
#if defined (__i386__) || defined (__amd64__)
uint32_t eax, ebx, ecx, edx;
uint32_t level;
+ pa_cpu_x86_flag_t flags = 0;
/* get standard level */
get_cpuid (0x00000000, &level, &ebx, &ecx, &edx);
@@ -58,25 +57,25 @@ void pa_cpu_init_x86 (void) {
get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
if (edx & (1<<23))
- pa_cpu_x86_flags |= PA_CPU_X86_MMX;
+ flags |= PA_CPU_X86_MMX;
if (edx & (1<<25))
- pa_cpu_x86_flags |= PA_CPU_X86_SSE;
+ flags |= PA_CPU_X86_SSE;
if (edx & (1<<26))
- pa_cpu_x86_flags |= PA_CPU_X86_SSE2;
+ flags |= PA_CPU_X86_SSE2;
if (ecx & (1<<0))
- pa_cpu_x86_flags |= PA_CPU_X86_SSE3;
+ flags |= PA_CPU_X86_SSE3;
if (ecx & (1<<9))
- pa_cpu_x86_flags |= PA_CPU_X86_SSSE3;
+ flags |= PA_CPU_X86_SSSE3;
if (ecx & (1<<19))
- pa_cpu_x86_flags |= PA_CPU_X86_SSE4_1;
+ flags |= PA_CPU_X86_SSE4_1;
if (ecx & (1<<20))
- pa_cpu_x86_flags |= PA_CPU_X86_SSE4_2;
+ flags |= PA_CPU_X86_SSE4_2;
}
/* get extended level */
@@ -85,38 +84,36 @@ void pa_cpu_init_x86 (void) {
get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
if (edx & (1<<22))
- pa_cpu_x86_flags |= PA_CPU_X86_MMXEXT;
+ flags |= PA_CPU_X86_MMXEXT;
if (edx & (1<<23))
- pa_cpu_x86_flags |= PA_CPU_X86_MMX;
+ flags |= PA_CPU_X86_MMX;
if (edx & (1<<30))
- pa_cpu_x86_flags |= PA_CPU_X86_3DNOWEXT;
+ flags |= PA_CPU_X86_3DNOWEXT;
if (edx & (1<<31))
- pa_cpu_x86_flags |= PA_CPU_X86_3DNOW;
+ flags |= PA_CPU_X86_3DNOW;
}
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
- (pa_cpu_x86_flags & PA_CPU_X86_MMX) ? "MMX " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSE) ? "SSE " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
- (pa_cpu_x86_flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
+ (flags & PA_CPU_X86_MMX) ? "MMX " : "",
+ (flags & PA_CPU_X86_SSE) ? "SSE " : "",
+ (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
+ (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
+ (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
+ (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
+ (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
+ (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
+ (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
+ (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
/* activate various optimisations */
- if (pa_cpu_x86_flags & PA_CPU_X86_MMX) {
- pa_volume_func_init_mmx (pa_cpu_x86_flags);
+ if (flags & PA_CPU_X86_MMX) {
+ pa_volume_func_init_mmx (flags);
}
- if (pa_cpu_x86_flags & PA_CPU_X86_SSE) {
- pa_volume_func_init_sse (pa_cpu_x86_flags);
+ if (flags & PA_CPU_X86_SSE) {
+ pa_volume_func_init_sse (flags);
}
-#else
- pa_cpu_x86_flags = 0;
-#endif
+#endif /* defined (__i386__) || defined (__amd64__) */
}
commit 8aa86f5247103432faf660cba33f5ce80fbbc2c7
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 19:51:11 2009 +0200
arm: implement ARM cpu detection
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
index 75646fe..93ad389 100644
--- a/src/pulsecore/cpu-arm.c
+++ b/src/pulsecore/cpu-arm.c
@@ -25,19 +25,116 @@
#endif
#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pulse/xmalloc.h>
#include <pulsecore/log.h>
#include "cpu-arm.h"
-static pa_cpu_arm_flag_t pa_cpu_arm_flags;
+#if defined (__arm__) && defined (__linux__)
+
+#define MAX_BUFFER 4096
+static char *
+get_cpuinfo_line (char *cpuinfo, const char *tag) {
+ char *line, *end, *colon;
+
+ if (!(line = strstr (cpuinfo, tag)))
+ return NULL;
+
+ if (!(end = strchr (line, '\n')))
+ return NULL;
+
+ if (!(colon = strchr (line, ':')))
+ return NULL;
+
+ if (++colon >= end)
+ return NULL;
+
+ return pa_xstrndup (colon, end - colon);
+}
+
+static char *get_cpuinfo(void) {
+ char *cpuinfo;
+ int n, fd;
+
+ if (!(cpuinfo = malloc(MAX_BUFFER)))
+ return NULL;
+
+ if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) {
+ free (cpuinfo);
+ return NULL;
+ }
+
+ if ((n = read(fd, cpuinfo, MAX_BUFFER-1)) < 0) {
+ free (cpuinfo);
+ close (fd);
+ return NULL;
+ }
+ cpuinfo[n] = 0;
+ close (fd);
+
+ return cpuinfo;
+}
+#endif /* defined (__arm__) && defined (__linux__) */
void pa_cpu_init_arm (void) {
#if defined (__arm__)
- pa_cpu_arm_flags = 0;
-
- pa_log ("ARM init\n");
+#if defined (__linux__)
+ char *cpuinfo, *line;
+ int arch;
+ pa_cpu_arm_flag_t flags = 0;
+
+ /* We need to read the CPU flags from /proc/cpuinfo because there is no user
+ * space support to get the CPU features. This only works on linux AFAIK. */
+ if (!(cpuinfo = get_cpuinfo ())) {
+ pa_log ("Can't read cpuinfo");
+ return;
+ }
+
+ /* get the CPU architecture */
+ if ((line = get_cpuinfo_line (cpuinfo, "CPU architecture"))) {
+ arch = strtoul (line, NULL, 0);
+ if (arch >= 6)
+ flags |= PA_CPU_ARM_V6;
+ if (arch >= 7)
+ flags |= PA_CPU_ARM_V7;
+
+ free (line);
+ }
+ /* get the CPU features */
+ if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
+ char *state = NULL, *current;
+
+ while ((current = pa_split_spaces (line, &state))) {
+ if (!strcmp (current, "vfp"))
+ flags |= PA_CPU_ARM_VFP;
+ else if (!strcmp (current, "edsp"))
+ flags |= PA_CPU_ARM_EDSP;
+ else if (!strcmp (current, "neon"))
+ flags |= PA_CPU_ARM_NEON;
+ else if (!strcmp (current, "vfpv3"))
+ flags |= PA_CPU_ARM_VFPV3;
+
+ free (current);
+ }
+ }
+ free (cpuinfo);
+
+ pa_log_info ("CPU flags: %s%s%s%s%s%s",
+ (flags & PA_CPU_ARM_V6) ? "V6 " : "",
+ (flags & PA_CPU_ARM_V7) ? "V7 " : "",
+ (flags & PA_CPU_ARM_VFP) ? "VFP " : "",
+ (flags & PA_CPU_ARM_EDSP) ? "EDSP " : "",
+ (flags & PA_CPU_ARM_NEON) ? "NEON " : "",
+ (flags & PA_CPU_ARM_VFPV3) ? "VFPV3 " : "");
+#else /* defined (__linux__) */
+ pa_log ("ARM cpu features not yet supported on this OS");
+#endif /* defined (__linux__) */
- pa_volume_func_init_arm (pa_cpu_arm_flags);
+ if (flags & PA_CPU_ARM_V6)
+ pa_volume_func_init_arm (flags);
#endif /* defined (__arm__) */
}
diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h
index 1a0ac27..3ccd070 100644
--- a/src/pulsecore/cpu-arm.h
+++ b/src/pulsecore/cpu-arm.h
@@ -27,8 +27,11 @@
typedef enum pa_cpu_arm_flag {
PA_CPU_ARM_V6 = (1 << 0),
- PA_CPU_ARM_NEON = (1 << 1),
- PA_CPU_ARM_VFP = (1 << 2)
+ PA_CPU_ARM_V7 = (1 << 1),
+ PA_CPU_ARM_VFP = (1 << 2),
+ PA_CPU_ARM_EDSP = (1 << 3),
+ PA_CPU_ARM_NEON = (1 << 4),
+ PA_CPU_ARM_VFPV3 = (1 << 5)
} pa_cpu_arm_flag_t;
void pa_cpu_init_arm (void);
commit aeae567f8861d2f068ebd0f054cd9d0aa6a7fe95
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 20:00:28 2009 +0200
svolume: add comment
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 0054d30..5979f7c 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -117,6 +117,9 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" test $1, %2 \n\t"
" je 6f \n\t"
+ /* FIXME, we can do aligned access of the volume values if we can guarantee
+ * that the array is 16 bytes aligned, we probably have to do the odd values
+ * after this then. */
"5: \n\t" /* do samples in groups of 4 */
" movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
" movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
commit 3cc1278dcf44c9fb93bfd2725a2f75de1958cf23
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Wed Aug 19 20:47:48 2009 +0200
resampler: avoid some multiplies when we can
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 2256516..43771dc 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -1122,8 +1122,13 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
d = (float *)dst + oc;
s = (float *)src + ic;
- for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d += *s * vol;
+ if (vol >= 1.0) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s * vol;
+ }
}
}
commit f09b51198f43d79b22cb92b5223d01a7ab339d9f
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 10:56:20 2009 +0200
whitespace fixes
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
index 93ad389..5a994b7 100644
--- a/src/pulsecore/cpu-arm.c
+++ b/src/pulsecore/cpu-arm.c
@@ -36,14 +36,14 @@
#if defined (__arm__) && defined (__linux__)
-#define MAX_BUFFER 4096
+#define MAX_BUFFER 4096
static char *
get_cpuinfo_line (char *cpuinfo, const char *tag) {
char *line, *end, *colon;
if (!(line = strstr (cpuinfo, tag)))
return NULL;
-
+
if (!(end = strchr (line, '\n')))
return NULL;
@@ -106,20 +106,20 @@ void pa_cpu_init_arm (void) {
}
/* get the CPU features */
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
- char *state = NULL, *current;
-
- while ((current = pa_split_spaces (line, &state))) {
- if (!strcmp (current, "vfp"))
- flags |= PA_CPU_ARM_VFP;
- else if (!strcmp (current, "edsp"))
- flags |= PA_CPU_ARM_EDSP;
- else if (!strcmp (current, "neon"))
- flags |= PA_CPU_ARM_NEON;
- else if (!strcmp (current, "vfpv3"))
- flags |= PA_CPU_ARM_VFPV3;
-
- free (current);
- }
+ char *state = NULL, *current;
+
+ while ((current = pa_split_spaces (line, &state))) {
+ if (!strcmp (current, "vfp"))
+ flags |= PA_CPU_ARM_VFP;
+ else if (!strcmp (current, "edsp"))
+ flags |= PA_CPU_ARM_EDSP;
+ else if (!strcmp (current, "neon"))
+ flags |= PA_CPU_ARM_NEON;
+ else if (!strcmp (current, "vfpv3"))
+ flags |= PA_CPU_ARM_VFPV3;
+
+ free (current);
+ }
}
free (cpuinfo);
diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
index 453ecf5..0457199 100644
--- a/src/pulsecore/cpu-x86.c
+++ b/src/pulsecore/cpu-x86.c
@@ -2,7 +2,7 @@
This file is part of PulseAudio.
Copyright 2004-2006 Lennart Poettering
- Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
PulseAudio is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
@@ -34,14 +34,15 @@
static void
get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
{
- __asm__ __volatile__ (
- " push %%"PA_REG_b" \n\t"
- " cpuid \n\t"
- " mov %%ebx, %%esi \n\t"
- " pop %%"PA_REG_b" \n\t"
-
- : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
- : "0" (op));
+ __asm__ __volatile__ (
+ " push %%"PA_REG_b" \n\t"
+ " cpuid \n\t"
+ " mov %%ebx, %%esi \n\t"
+ " pop %%"PA_REG_b" \n\t"
+
+ : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
+ : "0" (op)
+ );
}
#endif
@@ -97,23 +98,23 @@ void pa_cpu_init_x86 (void) {
}
pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
- (flags & PA_CPU_X86_MMX) ? "MMX " : "",
- (flags & PA_CPU_X86_SSE) ? "SSE " : "",
- (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
- (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
- (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
- (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
- (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
- (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
- (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
- (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
+ (flags & PA_CPU_X86_MMX) ? "MMX " : "",
+ (flags & PA_CPU_X86_SSE) ? "SSE " : "",
+ (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
+ (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
+ (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
+ (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
+ (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
+ (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
+ (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
+ (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
/* activate various optimisations */
- if (flags & PA_CPU_X86_MMX) {
+ if (flags & PA_CPU_X86_MMX)
pa_volume_func_init_mmx (flags);
- }
- if (flags & PA_CPU_X86_SSE) {
- pa_volume_func_init_sse (flags);
- }
+
+ if (flags & PA_CPU_X86_SSE)
+ pa_volume_func_init_sse (flags);
+
#endif /* defined (__i386__) || defined (__amd64__) */
}
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 43771dc..5a6c398 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -1065,30 +1065,53 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
}
static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
-
+ unsigned i;
+
switch (r->work_format) {
case PA_SAMPLE_FLOAT32NE:
{
float *d, *s;
- d = (float *) dst;
- s = (float *) src;
+ d = (float *) dst;
+ s = (float *) src;
- for (; n > 0; n--, s++, d += 2)
- d[0] = d[1] = *s;
- break;
- }
+ for (i = n >> 2; i; i--) {
+ d[0] = d[1] = s[0];
+ d[2] = d[3] = s[1];
+ d[4] = d[5] = s[2];
+ d[6] = d[7] = s[3];
+ s += 4;
+ d += 8;
+ }
+ for (i = n & 3; i; i--) {
+ d[0] = d[1] = s[0];
+ s++;
+ d += 2;
+ }
+ break;
+ }
case PA_SAMPLE_S16NE:
{
int16_t *d, *s;
- d = (int16_t *) dst;
- s = (int16_t *) src;
+ d = (int16_t *) dst;
+ s = (int16_t *) src;
- for (; n > 0; n--, s++, d += 2)
- d[0] = d[1] = *s;
- break;
- }
+ for (i = n >> 2; i; i--) {
+ d[0] = d[1] = s[0];
+ d[2] = d[3] = s[1];
+ d[4] = d[5] = s[2];
+ d[6] = d[7] = s[3];
+ s += 4;
+ d += 8;
+ }
+ for (i = n & 3; i; i--) {
+ d[0] = d[1] = s[0];
+ s++;
+ d += 2;
+ }
+ break;
+ }
default:
pa_assert_not_reached();
}
@@ -1114,7 +1137,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
for (ic = 0; ic < n_ic; ic++) {
float vol;
- vol = r->map_table_f[oc][ic];
+ vol = r->map_table_f[oc][ic];
if (vol <= 0.0)
continue;
@@ -1122,18 +1145,18 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
d = (float *)dst + oc;
s = (float *)src + ic;
- if (vol >= 1.0) {
+ if (vol >= 1.0) {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s;
- } else {
+ } else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += *s * vol;
- }
+ }
}
}
break;
- }
+ }
case PA_SAMPLE_S16NE:
{
int16_t *d, *s;
@@ -1144,7 +1167,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
for (ic = 0; ic < n_ic; ic++) {
int32_t vol;
- vol = r->map_table_i[oc][ic];
+ vol = r->map_table_i[oc][ic];
if (vol <= 0)
continue;
@@ -1158,11 +1181,11 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int16_t) (((int32_t)*s * vol) >> 16);
- }
+ }
}
}
break;
- }
+ }
default:
pa_assert_not_reached();
}
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 677f914..6e97e5a 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -752,12 +752,13 @@ void pa_volume_memchunk(
return;
}
- ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
-
do_volume = pa_get_volume_func (spec->format);
pa_assert(do_volume);
-
+
calc_volume_table[spec->format] ((void *)linear, volume);
+
+ ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
+
do_volume (ptr, (void *)linear, spec->channels, c->length);
pa_memblock_release(c->memblock);
@@ -944,12 +945,12 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
for (; n > 0; n--) {
float f;
- f = *s;
+ f = *s;
*d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
s = (const float*) ((const uint8_t*) s + sstr);
d = (float*) ((uint8_t*) d + dstr);
- }
+ }
} else {
pa_assert(format == PA_SAMPLE_FLOAT32RE);
diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c
index 7e25a13..0d39d10 100644
--- a/src/pulsecore/svolume_arm.c
+++ b/src/pulsecore/svolume_arm.c
@@ -40,86 +40,86 @@
#define MOD_INC() \
" subs r0, r6, %2 \n\t" \
" addcs r0, %1 \n\t" \
- " movcs r6, r0 \n\t"
+ " movcs r6, r0 \n\t"
static void
pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- int32_t *ve;
-
- channels = MAX (4, channels);
- ve = volumes + channels;
-
- __asm__ __volatile__ (
- " mov r6, %1 \n\t"
- " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
- " tst %3, #1 \n\t" /* check for odd samples */
- " beq 2f \n\t"
-
- "1: \n\t"
- " ldr r0, [r6], #4 \n\t" /* odd samples volumes */
- " ldrh r2, [%0] \n\t"
-
- " smulwb r0, r0, r2 \n\t"
- " ssat r0, #16, r0 \n\t"
-
- " strh r0, [%0], #2 \n\t"
-
- MOD_INC()
-
- "2: \n\t"
- " mov %3, %3, LSR #1 \n\t"
- " tst %3, #1 \n\t" /* check for odd samples */
- " beq 4f \n\t"
-
- "3: \n\t"
- " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
- " ldr r0, [%0] \n\t"
-
- " smulwt r2, r2, r0 \n\t"
- " smulwb r3, r3, r0 \n\t"
-
- " ssat r2, #16, r2 \n\t"
- " ssat r3, #16, r3 \n\t"
-
- " pkhbt r0, r3, r2, LSL #16 \n\t"
- " str r0, [%0], #4 \n\t"
-
- MOD_INC()
-
- "4: \n\t"
- " movs %3, %3, LSR #1 \n\t"
- " beq 6f \n\t"
-
- "5: \n\t"
- " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
- " ldrd r4, [r6], #8 \n\t"
- " ldrd r0, [%0] \n\t"
-
- " smulwt r2, r2, r0 \n\t"
- " smulwb r3, r3, r0 \n\t"
- " smulwt r4, r4, r1 \n\t"
- " smulwb r5, r5, r1 \n\t"
-
- " ssat r2, #16, r2 \n\t"
- " ssat r3, #16, r3 \n\t"
- " ssat r4, #16, r4 \n\t"
- " ssat r5, #16, r5 \n\t"
-
- " pkhbt r0, r3, r2, LSL #16 \n\t"
- " pkhbt r1, r5, r4, LSL #16 \n\t"
- " strd r0, [%0], #8 \n\t"
-
- MOD_INC()
-
- " subs %3, %3, #1 \n\t"
- " bne 5b \n\t"
- "6: \n\t"
-
- : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
- :
- : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
- );
+ int32_t *ve;
+
+ channels = MAX (4, channels);
+ ve = volumes + channels;
+
+ __asm__ __volatile__ (
+ " mov r6, %1 \n\t"
+ " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
+ " tst %3, #1 \n\t" /* check for odd samples */
+ " beq 2f \n\t"
+
+ "1: \n\t"
+ " ldr r0, [r6], #4 \n\t" /* odd samples volumes */
+ " ldrh r2, [%0] \n\t"
+
+ " smulwb r0, r0, r2 \n\t"
+ " ssat r0, #16, r0 \n\t"
+
+ " strh r0, [%0], #2 \n\t"
+
+ MOD_INC()
+
+ "2: \n\t"
+ " mov %3, %3, LSR #1 \n\t"
+ " tst %3, #1 \n\t" /* check for odd samples */
+ " beq 4f \n\t"
+
+ "3: \n\t"
+ " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
+ " ldr r0, [%0] \n\t"
+
+ " smulwt r2, r2, r0 \n\t"
+ " smulwb r3, r3, r0 \n\t"
+
+ " ssat r2, #16, r2 \n\t"
+ " ssat r3, #16, r3 \n\t"
+
+ " pkhbt r0, r3, r2, LSL #16 \n\t"
+ " str r0, [%0], #4 \n\t"
+
+ MOD_INC()
+
+ "4: \n\t"
+ " movs %3, %3, LSR #1 \n\t"
+ " beq 6f \n\t"
+
+ "5: \n\t"
+ " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
+ " ldrd r4, [r6], #8 \n\t"
+ " ldrd r0, [%0] \n\t"
+
+ " smulwt r2, r2, r0 \n\t"
+ " smulwb r3, r3, r0 \n\t"
+ " smulwt r4, r4, r1 \n\t"
+ " smulwb r5, r5, r1 \n\t"
+
+ " ssat r2, #16, r2 \n\t"
+ " ssat r3, #16, r3 \n\t"
+ " ssat r4, #16, r4 \n\t"
+ " ssat r5, #16, r5 \n\t"
+
+ " pkhbt r0, r3, r2, LSL #16 \n\t"
+ " pkhbt r1, r5, r4, LSL #16 \n\t"
+ " strd r0, [%0], #8 \n\t"
+
+ MOD_INC()
+
+ " subs %3, %3, #1 \n\t"
+ " bne 5b \n\t"
+ "6: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
+ :
+ : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
+ );
}
#undef RUN_TEST
@@ -131,51 +131,51 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16
static void run_test (void) {
- int16_t samples[SAMPLES];
- int16_t samples_ref[SAMPLES];
- int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS + PADDING];
- int i, j, padding;
- pa_do_volume_func_t func;
- struct timeval start, stop;
-
- func = pa_get_volume_func (PA_SAMPLE_S16NE);
-
- printf ("checking ARM %zd\n", sizeof (samples));
-
- pa_random (samples, sizeof (samples));
- memcpy (samples_ref, samples, sizeof (samples));
- memcpy (samples_orig, samples, sizeof (samples));
-
- for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 1;
- for (padding = 0; padding < PADDING; padding++, i++)
- volumes[i] = volumes[padding];
-
- func (samples_ref, volumes, CHANNELS, sizeof (samples));
- pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
- for (i = 0; i < SAMPLES; i++) {
- if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
- }
- }
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS + PADDING];
+ int i, j, padding;
+ pa_do_volume_func_t func;
+ struct timeval start, stop;
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples, samples_orig, sizeof (samples));
- pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking ARM %zd\n", sizeof (samples));
+
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+ }
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
@@ -184,12 +184,12 @@ static void run_test (void) {
void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
#if defined (__arm__)
- pa_log_info("Initialising ARM optimized functions.");
+ pa_log_info("Initialising ARM optimized functions.");
#ifdef RUN_TEST
- run_test ();
+ run_test ();
#endif
- pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm);
#endif /* defined (__arm__) */
}
diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
index 2148a57..5fc052b 100644
--- a/src/pulsecore/svolume_c.c
+++ b/src/pulsecore/svolume_c.c
@@ -35,289 +35,289 @@
static void
pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int32_t) *samples - 0x80;
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
- *samples++ = (uint8_t) (t + 0x80);
+ t = (int32_t) *samples - 0x80;
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+ *samples++ = (uint8_t) (t + 0x80);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int32_t) st_alaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+ t = (int32_t) st_alaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int32_t) st_ulaw2linear16(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+ t = (int32_t) st_ulaw2linear16(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (int16_t);
+ length /= sizeof (int16_t);
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- /* Multiplying the 32bit volume factor with the 16bit
- * sample might result in an 48bit value. We want to
- * do without 64 bit integers and hence do the
- * multiplication independantly for the HI and LO part
- * of the volume. */
+ /* Multiplying the 32bit volume factor with the 16bit
+ * sample might result in an 48bit value. We want to
+ * do without 64 bit integers and hence do the
+ * multiplication independantly for the HI and LO part
+ * of the volume. */
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int32_t)(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = (int16_t) t;
+ t = (int32_t)(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = (int16_t) t;
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (int16_t);
+ length /= sizeof (int16_t);
- for (channel = 0; length; length--) {
- int32_t t, hi, lo;
+ for (channel = 0; length; length--) {
+ int32_t t, hi, lo;
- hi = volumes[channel] >> 16;
- lo = volumes[channel] & 0xFFFF;
+ hi = volumes[channel] >> 16;
+ lo = volumes[channel] & 0xFFFF;
- t = (int32_t) PA_INT16_SWAP(*samples);
- t = ((t * lo) >> 16) + (t * hi);
- t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
- *samples++ = PA_INT16_SWAP((int16_t) t);
+ t = (int32_t) PA_INT16_SWAP(*samples);
+ t = ((t * lo) >> 16) + (t * hi);
+ t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+ *samples++ = PA_INT16_SWAP((int16_t) t);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (float);
+ length /= sizeof (float);
- for (channel = 0; length; length--) {
- *samples++ *= volumes[channel];
+ for (channel = 0; length; length--) {
+ *samples++ *= volumes[channel];
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (float);
+ length /= sizeof (float);
- for (channel = 0; length; length--) {
- float t;
+ for (channel = 0; length; length--) {
+ float t;
- t = PA_FLOAT32_SWAP(*samples);
- t *= volumes[channel];
- *samples++ = PA_FLOAT32_SWAP(t);
+ t = PA_FLOAT32_SWAP(*samples);
+ t *= volumes[channel];
+ *samples++ = PA_FLOAT32_SWAP(t);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (int32_t);
+ length /= sizeof (int32_t);
- for (channel = 0; length; length--) {
- int64_t t;
+ for (channel = 0; length; length--) {
+ int64_t t;
- t = (int64_t)(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = (int32_t) t;
+ t = (int64_t)(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = (int32_t) t;
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (int32_t);
+ length /= sizeof (int32_t);
- for (channel = 0; length; length--) {
- int64_t t;
+ for (channel = 0; length; length--) {
+ int64_t t;
- t = (int64_t) PA_INT32_SWAP(*samples);
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_INT32_SWAP((int32_t) t);
+ t = (int64_t) PA_INT32_SWAP(*samples);
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_INT32_SWAP((int32_t) t);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
- uint8_t *e;
+ unsigned channel;
+ uint8_t *e;
- e = samples + length;
+ e = samples + length;
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
- t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
+ t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
- uint8_t *e;
+ unsigned channel;
+ uint8_t *e;
- e = samples + length;
+ e = samples + length;
- for (channel = 0; samples < e; samples += 3) {
- int64_t t;
+ for (channel = 0; samples < e; samples += 3) {
+ int64_t t;
- t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
+ t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (uint32_t);
+ length /= sizeof (uint32_t);
- for (channel = 0; length; length--) {
- int64_t t;
+ for (channel = 0; length; length--) {
+ int64_t t;
- t = (int64_t) ((int32_t) (*samples << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
+ t = (int64_t) ((int32_t) (*samples << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static void
pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- unsigned channel;
+ unsigned channel;
- length /= sizeof (uint32_t);
+ length /= sizeof (uint32_t);
- for (channel = 0; length; length--) {
- int64_t t;
+ for (channel = 0; length; length--) {
+ int64_t t;
- t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
- t = (t * volumes[channel]) >> 16;
- t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
- *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+ t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+ t = (t * volumes[channel]) >> 16;
+ t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+ *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
- if (PA_UNLIKELY(++channel >= channels))
- channel = 0;
- }
+ if (PA_UNLIKELY(++channel >= channels))
+ channel = 0;
+ }
}
static pa_do_volume_func_t do_volume_table[] =
{
- [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
- [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
- [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
- [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
- [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
- [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
- [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
- [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
- [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
- [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
- [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
- [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
- [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
+ [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c,
+ [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c,
+ [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c,
+ [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c,
+ [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c,
+ [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
+ [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
+ [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c,
+ [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c,
+ [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c,
+ [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c,
+ [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
+ [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c
};
pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 86af76d..7e24268 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -73,7 +73,7 @@
" add "#a", %3 \n\t" \
" mov %3, %4 \n\t" \
" sub "#b", %4 \n\t" \
- " cmovae %4, %3 \n\t"
+ " cmovae %4, %3 \n\t"
/* swap 16 bits */
#define SWAP_16(s) \
@@ -96,147 +96,147 @@
static void
pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- pa_reg_x86 channel, temp;
-
- /* the max number of samples we process at a time, this is also the max amount
- * we overread the volume array, which should have enough padding. */
- channels = MAX (4, channels);
-
- __asm__ __volatile__ (
- " xor %3, %3 \n\t"
- " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
- " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
- " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
- " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
- " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
-
- " test $1, %2 \n\t" /* check for odd samples */
- " je 2f \n\t"
-
- " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %w4 \n\t" /* .. | p0 | */
- " movd %4, %%mm1 \n\t"
- VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
- " movw %w4, (%0) \n\t"
- " add $2, %0 \n\t"
- MOD_ADD ($1, %5)
-
- "2: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
- " je 4f \n\t"
-
- "3: \n\t" /* do samples in groups of 2 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
- VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " add $4, %0 \n\t"
- MOD_ADD ($2, %5)
-
- "4: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " cmp $0, %2 \n\t"
- " je 6f \n\t"
-
- "5: \n\t" /* do samples in groups of 4 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
- " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
- " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
- VOLUME_32x16 (%%mm1, %%mm0)
- VOLUME_32x16 (%%mm3, %%mm2)
- " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
- " add $8, %0 \n\t"
- MOD_ADD ($4, %5)
- " dec %2 \n\t"
- " jne 5b \n\t"
-
- "6: \n\t"
- " emms \n\t"
-
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
- : "cc"
- );
+ pa_reg_x86 channel, temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (4, channels);
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
+ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
+ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
+ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " movd %4, %%mm1 \n\t"
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
+ " movw %w4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in groups of 4 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
+ VOLUME_32x16 (%%mm1, %%mm0)
+ VOLUME_32x16 (%%mm3, %%mm2)
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+ " dec %2 \n\t"
+ " jne 5b \n\t"
+
+ "6: \n\t"
+ " emms \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
+ : "cc"
+ );
}
static void
pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- pa_reg_x86 channel, temp;
-
- /* the max number of samples we process at a time, this is also the max amount
- * we overread the volume array, which should have enough padding. */
- channels = MAX (4, channels);
-
- __asm__ __volatile__ (
- " xor %3, %3 \n\t"
- " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
- " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
- " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
- " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
- " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
-
- " test $1, %2 \n\t" /* check for odd samples */
- " je 2f \n\t"
-
- " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %w4 \n\t" /* .. | p0 | */
- " rorw $8, %w4 \n\t"
- " movd %4, %%mm1 \n\t"
- VOLUME_32x16 (%%mm1, %%mm0)
- " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
- " rorw $8, %w4 \n\t"
- " movw %w4, (%0) \n\t"
- " add $2, %0 \n\t"
- MOD_ADD ($1, %5)
-
- "2: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
- " je 4f \n\t"
-
- "3: \n\t" /* do samples in groups of 2 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
- SWAP_16 (%%mm1)
- VOLUME_32x16 (%%mm1, %%mm0)
- SWAP_16 (%%mm0)
- " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " add $4, %0 \n\t"
- MOD_ADD ($2, %5)
-
- "4: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " cmp $0, %2 \n\t"
- " je 6f \n\t"
-
- "5: \n\t" /* do samples in groups of 4 */
- " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
- " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
- " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
- SWAP_16_2 (%%mm1, %%mm3)
- VOLUME_32x16 (%%mm1, %%mm0)
- VOLUME_32x16 (%%mm3, %%mm2)
- SWAP_16_2 (%%mm0, %%mm2)
- " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
- " add $8, %0 \n\t"
- MOD_ADD ($4, %5)
- " dec %2 \n\t"
- " jne 5b \n\t"
-
- "6: \n\t"
- " emms \n\t"
-
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
- : "cc"
- );
+ pa_reg_x86 channel, temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (4, channels);
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+ " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
+ " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
+ " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
+ " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " rorw $8, %w4 \n\t"
+ " movd %4, %%mm1 \n\t"
+ VOLUME_32x16 (%%mm1, %%mm0)
+ " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */
+ " rorw $8, %w4 \n\t"
+ " movw %w4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ SWAP_16 (%%mm1)
+ VOLUME_32x16 (%%mm1, %%mm0)
+ SWAP_16 (%%mm0)
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 6f \n\t"
+
+ "5: \n\t" /* do samples in groups of 4 */
+ " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */
+ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */
+ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */
+ SWAP_16_2 (%%mm1, %%mm3)
+ VOLUME_32x16 (%%mm1, %%mm0)
+ VOLUME_32x16 (%%mm3, %%mm2)
+ SWAP_16_2 (%%mm0, %%mm2)
+ " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+ " dec %2 \n\t"
+ " jne 5b \n\t"
+
+ "6: \n\t"
+ " emms \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
+ : "cc"
+ );
}
#undef RUN_TEST
@@ -248,51 +248,51 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16
static void run_test (void) {
- int16_t samples[SAMPLES];
- int16_t samples_ref[SAMPLES];
- int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS + PADDING];
- int i, j, padding;
- pa_do_volume_func_t func;
- struct timeval start, stop;
-
- func = pa_get_volume_func (PA_SAMPLE_S16NE);
-
- printf ("checking MMX %zd\n", sizeof (samples));
-
- pa_random (samples, sizeof (samples));
- memcpy (samples_ref, samples, sizeof (samples));
- memcpy (samples_orig, samples, sizeof (samples));
-
- for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 1;
- for (padding = 0; padding < PADDING; padding++, i++)
- volumes[i] = volumes[padding];
-
- func (samples_ref, volumes, CHANNELS, sizeof (samples));
- pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
- for (i = 0; i < SAMPLES; i++) {
- if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
- }
- }
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS + PADDING];
+ int i, j, padding;
+ pa_do_volume_func_t func;
+ struct timeval start, stop;
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples, samples_orig, sizeof (samples));
- pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking MMX %zd\n", sizeof (samples));
+
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+ }
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
@@ -301,13 +301,13 @@ static void run_test (void) {
void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
#if defined (__i386__) || defined (__amd64__)
- pa_log_info("Initialising MMX optimized functions.");
+ pa_log_info("Initialising MMX optimized functions.");
#ifdef RUN_TEST
- run_test ();
+ run_test ();
#endif
- pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
- pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);
+ pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx);
#endif /* defined (__i386__) || defined (__amd64__) */
}
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 5979f7c..b5e3687 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -48,7 +48,7 @@
" psrld $16, "#v" \n\t" /* .. | p0 | 0 | */ \
" pmaddwd %%xmm5, "#v" \n\t" /* .. | p0 * vh | */ \
" paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
- " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
+ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
#define MOD_ADD(a,b) \
" add "#a", %3 \n\t" /* channel += inc */ \
@@ -77,169 +77,169 @@
static void
pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- pa_reg_x86 channel, temp;
-
- /* the max number of samples we process at a time, this is also the max amount
- * we overread the volume array, which should have enough padding. */
- channels = MAX (8, channels);
-
- __asm__ __volatile__ (
- " xor %3, %3 \n\t"
- " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
-
- " test $1, %2 \n\t" /* check for odd samples */
- " je 2f \n\t"
-
- " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %w4 \n\t" /* .. | p0 | */
- " movd %4, %%xmm1 \n\t"
- VOLUME_32x16 (%%xmm1, %%xmm0)
- " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
- " movw %w4, (%0) \n\t"
- " add $2, %0 \n\t"
- MOD_ADD ($1, %5)
-
- "2: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t"
- " je 4f \n\t"
-
- "3: \n\t" /* do samples in groups of 2 */
- " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
- VOLUME_32x16 (%%xmm1, %%xmm0)
- " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " add $4, %0 \n\t"
- MOD_ADD ($2, %5)
-
- "4: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " test $1, %2 \n\t"
- " je 6f \n\t"
-
- /* FIXME, we can do aligned access of the volume values if we can guarantee
- * that the array is 16 bytes aligned, we probably have to do the odd values
- * after this then. */
- "5: \n\t" /* do samples in groups of 4 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
- " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
- VOLUME_32x16 (%%xmm1, %%xmm0)
- " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
- " add $8, %0 \n\t"
- MOD_ADD ($4, %5)
-
- "6: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
- " cmp $0, %2 \n\t"
- " je 8f \n\t"
-
- "7: \n\t" /* do samples in groups of 8 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
- " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
- " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
- " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
- VOLUME_32x16 (%%xmm1, %%xmm0)
- VOLUME_32x16 (%%xmm3, %%xmm2)
- " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
- " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
- " add $16, %0 \n\t"
- MOD_ADD ($8, %5)
- " dec %2 \n\t"
- " jne 7b \n\t"
- "8: \n\t"
-
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
- : "cc"
- );
+ pa_reg_x86 channel, temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (8, channels);
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " movd %4, %%xmm1 \n\t"
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
+ " movw %w4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t"
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " test $1, %2 \n\t"
+ " je 6f \n\t"
+
+ /* FIXME, we can do aligned access of the volume values if we can guarantee
+ * that the array is 16 bytes aligned, we probably have to do the odd values
+ * after this then. */
+ "5: \n\t" /* do samples in groups of 4 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+
+ "6: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 8f \n\t"
+
+ "7: \n\t" /* do samples in groups of 8 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ VOLUME_32x16 (%%xmm3, %%xmm2)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
+ " add $16, %0 \n\t"
+ MOD_ADD ($8, %5)
+ " dec %2 \n\t"
+ " jne 7b \n\t"
+ "8: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
+ : "cc"
+ );
}
static void
pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
{
- pa_reg_x86 channel, temp;
-
- /* the max number of samples we process at a time, this is also the max amount
- * we overread the volume array, which should have enough padding. */
- channels = MAX (8, channels);
-
- __asm__ __volatile__ (
- " xor %3, %3 \n\t"
- " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
-
- " test $1, %2 \n\t" /* check for odd samples */
- " je 2f \n\t"
-
- " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
- " movw (%0), %w4 \n\t" /* .. | p0 | */
- " rorw $8, %w4 \n\t"
- " movd %4, %%xmm1 \n\t"
- VOLUME_32x16 (%%xmm1, %%xmm0)
- " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
- " rorw $8, %w4 \n\t"
- " movw %w4, (%0) \n\t"
- " add $2, %0 \n\t"
- MOD_ADD ($1, %5)
-
- "2: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t"
- " je 4f \n\t"
-
- "3: \n\t" /* do samples in groups of 2 */
- " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
- " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
- SWAP_16 (%%xmm1)
- VOLUME_32x16 (%%xmm1, %%xmm0)
- SWAP_16 (%%xmm0)
- " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
- " add $4, %0 \n\t"
- MOD_ADD ($2, %5)
-
- "4: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " test $1, %2 \n\t"
- " je 6f \n\t"
-
- /* FIXME, we can do aligned access of the volume values if we can guarantee
- * that the array is 16 bytes aligned, we probably have to do the odd values
- * after this then. */
- "5: \n\t" /* do samples in groups of 4 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
- " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
- SWAP_16 (%%xmm1)
- VOLUME_32x16 (%%xmm1, %%xmm0)
- SWAP_16 (%%xmm0)
- " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
- " add $8, %0 \n\t"
- MOD_ADD ($4, %5)
-
- "6: \n\t"
- " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
- " cmp $0, %2 \n\t"
- " je 8f \n\t"
-
- "7: \n\t" /* do samples in groups of 8 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
- " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
- " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
- " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
- SWAP_16_2 (%%xmm1, %%xmm3)
- VOLUME_32x16 (%%xmm1, %%xmm0)
- VOLUME_32x16 (%%xmm3, %%xmm2)
- SWAP_16_2 (%%xmm0, %%xmm2)
- " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
- " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
- " add $16, %0 \n\t"
- MOD_ADD ($8, %5)
- " dec %2 \n\t"
- " jne 7b \n\t"
- "8: \n\t"
-
- : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
- : "cc"
- );
+ pa_reg_x86 channel, temp;
+
+ /* the max number of samples we process at a time, this is also the max amount
+ * we overread the volume array, which should have enough padding. */
+ channels = MAX (8, channels);
+
+ __asm__ __volatile__ (
+ " xor %3, %3 \n\t"
+ " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
+
+ " test $1, %2 \n\t" /* check for odd samples */
+ " je 2f \n\t"
+
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %w4 \n\t" /* .. | p0 | */
+ " rorw $8, %w4 \n\t"
+ " movd %4, %%xmm1 \n\t"
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
+ " rorw $8, %w4 \n\t"
+ " movw %w4, (%0) \n\t"
+ " add $2, %0 \n\t"
+ MOD_ADD ($1, %5)
+
+ "2: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
+ " test $1, %2 \n\t"
+ " je 4f \n\t"
+
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
+ SWAP_16 (%%xmm1)
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ SWAP_16 (%%xmm0)
+ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
+ " add $4, %0 \n\t"
+ MOD_ADD ($2, %5)
+
+ "4: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
+ " test $1, %2 \n\t"
+ " je 6f \n\t"
+
+ /* FIXME, we can do aligned access of the volume values if we can guarantee
+ * that the array is 16 bytes aligned, we probably have to do the odd values
+ * after this then. */
+ "5: \n\t" /* do samples in groups of 4 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ SWAP_16 (%%xmm1)
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ SWAP_16 (%%xmm0)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " add $8, %0 \n\t"
+ MOD_ADD ($4, %5)
+
+ "6: \n\t"
+ " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 8f \n\t"
+
+ "7: \n\t" /* do samples in groups of 8 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
+ SWAP_16_2 (%%xmm1, %%xmm3)
+ VOLUME_32x16 (%%xmm1, %%xmm0)
+ VOLUME_32x16 (%%xmm3, %%xmm2)
+ SWAP_16_2 (%%xmm0, %%xmm2)
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
+ " add $16, %0 \n\t"
+ MOD_ADD ($8, %5)
+ " dec %2 \n\t"
+ " jne 7b \n\t"
+ "8: \n\t"
+
+ : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+ : "r" ((pa_reg_x86)channels)
+ : "cc"
+ );
}
#undef RUN_TEST
@@ -251,64 +251,64 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
#define PADDING 16
static void run_test (void) {
- int16_t samples[SAMPLES];
- int16_t samples_ref[SAMPLES];
- int16_t samples_orig[SAMPLES];
- int32_t volumes[CHANNELS + PADDING];
- int i, j, padding;
- pa_do_volume_func_t func;
- struct timeval start, stop;
-
- func = pa_get_volume_func (PA_SAMPLE_S16NE);
-
- printf ("checking SSE %zd\n", sizeof (samples));
-
- pa_random (samples, sizeof (samples));
- memcpy (samples_ref, samples, sizeof (samples));
- memcpy (samples_orig, samples, sizeof (samples));
-
- for (i = 0; i < CHANNELS; i++)
- volumes[i] = rand() >> 1;
- for (padding = 0; padding < PADDING; padding++, i++)
- volumes[i] = volumes[padding];
-
- func (samples_ref, volumes, CHANNELS, sizeof (samples));
- pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
- for (i = 0; i < SAMPLES; i++) {
- if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
- samples_orig[i], volumes[i % CHANNELS]);
- }
- }
+ int16_t samples[SAMPLES];
+ int16_t samples_ref[SAMPLES];
+ int16_t samples_orig[SAMPLES];
+ int32_t volumes[CHANNELS + PADDING];
+ int i, j, padding;
+ pa_do_volume_func_t func;
+ struct timeval start, stop;
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples, samples_orig, sizeof (samples));
- pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+ printf ("checking SSE %zd\n", sizeof (samples));
+
+ pa_random (samples, sizeof (samples));
+ memcpy (samples_ref, samples, sizeof (samples));
+ memcpy (samples_orig, samples, sizeof (samples));
+
+ for (i = 0; i < CHANNELS; i++)
+ volumes[i] = rand() >> 1;
+ for (padding = 0; padding < PADDING; padding++, i++)
+ volumes[i] = volumes[padding];
- pa_gettimeofday(&start);
- for (j = 0; j < TIMES; j++) {
- memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
- }
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+ for (i = 0; i < SAMPLES; i++) {
+ if (samples[i] != samples_ref[i]) {
+ printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ samples_orig[i], volumes[i % CHANNELS]);
+ }
+ }
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples, samples_orig, sizeof (samples));
+ pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+
+ pa_gettimeofday(&start);
+ for (j = 0; j < TIMES; j++) {
+ memcpy (samples_ref, samples_orig, sizeof (samples));
+ func (samples_ref, volumes, CHANNELS, sizeof (samples));
+ }
+ pa_gettimeofday(&stop);
+ pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
}
#endif
#endif /* defined (__i386__) || defined (__amd64__) */
void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
#if defined (__i386__) || defined (__amd64__)
- pa_log_info("Initialising SSE optimized functions.");
+ pa_log_info("Initialising SSE optimized functions.");
#ifdef RUN_TEST
- run_test ();
+ run_test ();
#endif
- pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
- pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
+ pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse);
+ pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse);
#endif /* defined (__i386__) || defined (__amd64__) */
}
commit f8ffe0dabcedf56437c00feb895d7d7229971ba0
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 12:30:48 2009 +0200
svolume: cleanups
Use PA_MAX
Use pa_rtclock_now() for benchmarks
diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c
index 0d39d10..5bd1448 100644
--- a/src/pulsecore/svolume_arm.c
+++ b/src/pulsecore/svolume_arm.c
@@ -47,7 +47,7 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi
{
int32_t *ve;
- channels = MAX (4, channels);
+ channels = PA_MAX (4U, channels);
ve = volumes + channels;
__asm__ __volatile__ (
@@ -137,7 +137,7 @@ static void run_test (void) {
int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
- struct timeval start, stop;
+ pa_usec_t start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE);
@@ -161,21 +161,21 @@ static void run_test (void) {
}
}
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("ARM: %llu usec.", (long long unsigned int) (stop - start));
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("ref: %llu usec.", (long long unsigned int) (stop - start));
}
#endif
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
index 7e24268..8510b0c 100644
--- a/src/pulsecore/svolume_mmx.c
+++ b/src/pulsecore/svolume_mmx.c
@@ -100,7 +100,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
- channels = MAX (4, channels);
+ channels = PA_MAX (4U, channels);
__asm__ __volatile__ (
" xor %3, %3 \n\t"
@@ -170,7 +170,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
- channels = MAX (4, channels);
+ channels = PA_MAX (4U, channels);
__asm__ __volatile__ (
" xor %3, %3 \n\t"
@@ -254,7 +254,7 @@ static void run_test (void) {
int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
- struct timeval start, stop;
+ pa_usec_t start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE);
@@ -278,21 +278,21 @@ static void run_test (void) {
}
}
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("MMX: %llu usec.", (long long unsigned int)(stop - start));
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}
#endif
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index b5e3687..54af4a5 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -81,7 +81,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
- channels = MAX (8, channels);
+ channels = PA_MAX (8U, channels);
__asm__ __volatile__ (
" xor %3, %3 \n\t"
@@ -161,7 +161,7 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
/* the max number of samples we process at a time, this is also the max amount
* we overread the volume array, which should have enough padding. */
- channels = MAX (8, channels);
+ channels = PA_MAX (8U, channels);
__asm__ __volatile__ (
" xor %3, %3 \n\t"
@@ -257,7 +257,7 @@ static void run_test (void) {
int32_t volumes[CHANNELS + PADDING];
int i, j, padding;
pa_do_volume_func_t func;
- struct timeval start, stop;
+ pa_usec_t start, stop;
func = pa_get_volume_func (PA_SAMPLE_S16NE);
@@ -281,21 +281,21 @@ static void run_test (void) {
}
}
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples, samples_orig, sizeof (samples));
pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("SSE: %llu usec.", (long long unsigned int)(stop - start));
- pa_gettimeofday(&start);
+ start = pa_rtclock_now();
for (j = 0; j < TIMES; j++) {
memcpy (samples_ref, samples_orig, sizeof (samples));
func (samples_ref, volumes, CHANNELS, sizeof (samples));
}
- pa_gettimeofday(&stop);
- pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start));
+ stop = pa_rtclock_now();
+ pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}
#endif
#endif /* defined (__i386__) || defined (__amd64__) */
commit c1b6a87b27b569cda135da05b53cc98aa9ca37cb
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 13:40:27 2009 +0200
alsa-sink: reduce the amount of smoother updates
Exponentially increase the amount of time between smoother updates. We start
with a 2ms interval and increase up to 200ms intervals.
Smoother updates and the resulting linear regression take a fair amount of CPU
so we want to reduce the amount of updates.
diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c
index e3707ae..c369472 100644
--- a/src/modules/alsa/alsa-sink.c
+++ b/src/modules/alsa/alsa-sink.c
@@ -68,6 +68,9 @@
#define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- Sleep at least 10ms on each iteration */
#define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms -- Wakeup at least this long before the buffer runs empty*/
+#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms -- min smoother update interval */
+#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms -- max smoother update inteval */
+
#define VOLUME_ACCURACY (PA_VOLUME_NORM/100) /* don't require volume adjustments to be perfectly correct. don't necessarily extend granularity in software unless the differences get greater than this level */
struct userdata {
@@ -115,6 +118,8 @@ struct userdata {
pa_smoother *smoother;
uint64_t write_count;
uint64_t since_start;
+ pa_usec_t smoother_interval;
+ pa_usec_t last_smoother_update;
pa_reserve_wrapper *reserve;
pa_hook_slot *reserve_slot;
@@ -723,17 +728,27 @@ static void update_smoother(struct userdata *u) {
now1 = pa_timespec_load(&htstamp);
}
+ /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */
+ if (now1 <= 0)
+ now1 = pa_rtclock_now();
+
+ /* check if the time since the last update is bigger than the interval */
+ if (u->last_smoother_update > 0) {
+ if (u->last_smoother_update + u->smoother_interval > now1)
+ return;
+ }
+
position = (int64_t) u->write_count - ((int64_t) delay * (int64_t) u->frame_size);
if (PA_UNLIKELY(position < 0))
position = 0;
- /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */
- if (now1 <= 0)
- now1 = pa_rtclock_now();
-
now2 = pa_bytes_to_usec((uint64_t) position, &u->sink->sample_spec);
+ u->last_smoother_update = now1;
+ /* exponentially increase the update interval up to the MAX limit */
+ u->smoother_interval = PA_MIN (u->smoother_interval * 2, SMOOTHER_MAX_INTERVAL);
+
pa_smoother_put(u->smoother, now1, now2);
}
@@ -906,6 +921,8 @@ static int unsuspend(struct userdata *u) {
u->write_count = 0;
pa_smoother_reset(u->smoother, pa_rtclock_now(), TRUE);
+ u->smoother_interval = SMOOTHER_MIN_INTERVAL;
+ u->last_smoother_update = 0;
u->first = TRUE;
u->since_start = 0;
@@ -1622,6 +1639,7 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca
5,
pa_rtclock_now(),
TRUE);
+ u->smoother_interval = SMOOTHER_MIN_INTERVAL;
dev_id = pa_modargs_get_value(
ma, "device_id",
commit 05fef5f551ac7f295d2f2cb74642cb359be1b12d
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 15:50:02 2009 +0200
sconv: allow for setting custom functions
Add methods to override the default conversion functions.
diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c
index 937bf5d..d06d698 100644
--- a/src/pulsecore/sconv.c
+++ b/src/pulsecore/sconv.c
@@ -184,98 +184,130 @@ static void alaw_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) {
*b = st_13linear2alaw(*a >> 3);
}
+static pa_convert_func_t to_float32ne_table[] = {
+ [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_float32ne,
+ [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_float32ne,
+ [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_float32ne,
+ [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_to_float32ne,
+ [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_to_float32ne,
+ [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_float32ne,
+ [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_float32ne,
+ [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_float32ne,
+ [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_float32ne,
+ [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne,
+ [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne,
+ [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
+ [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
+};
+
pa_convert_func_t pa_get_convert_to_float32ne_function(pa_sample_format_t f) {
- static const pa_convert_func_t table[] = {
- [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_float32ne,
- [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_float32ne,
- [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_float32ne,
- [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_to_float32ne,
- [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_to_float32ne,
- [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_float32ne,
- [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_float32ne,
- [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_float32ne,
- [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_float32ne,
- [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne,
- [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne,
- [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
- [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
- };
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ return to_float32ne_table[f];
+}
+
+void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {
pa_assert(f >= 0);
pa_assert(f < PA_SAMPLE_MAX);
- return table[f];
+ to_float32ne_table[f] = func;
}
+static pa_convert_func_t from_float32ne_table[] = {
+ [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_float32ne,
+ [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_from_float32ne,
+ [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_from_float32ne,
+ [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_float32ne,
+ [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_float32ne,
+ [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_float32ne,
+ [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_float32ne,
+ [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne,
+ [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne,
+ [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
+ [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
+ [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_float32ne,
+ [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_float32ne
+};
+
pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) {
- static const pa_convert_func_t table[] = {
- [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_float32ne,
- [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_from_float32ne,
- [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_from_float32ne,
- [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_float32ne,
- [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_float32ne,
- [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_float32ne,
- [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_float32ne,
- [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne,
- [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne,
- [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
- [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
- [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_float32ne,
- [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_float32ne
- };
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ return from_float32ne_table[f];
+}
+
+void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {
pa_assert(f >= 0);
pa_assert(f < PA_SAMPLE_MAX);
- return table[f];
+ from_float32ne_table[f] = func;
}
+static pa_convert_func_t to_s16ne_table[] = {
+ [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_s16ne,
+ [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne,
+ [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne,
+ [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne,
+ [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne,
+ [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_s16ne,
+ [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_s16ne,
+ [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_s16ne,
+ [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_s16ne,
+ [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne,
+ [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne,
+ [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_s16ne,
+ [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_s16ne
+};
+
pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) {
- static const pa_convert_func_t table[] = {
- [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_s16ne,
- [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne,
- [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne,
- [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne,
- [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne,
- [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_s16ne,
- [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_s16ne,
- [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_s16ne,
- [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_s16ne,
- [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne,
- [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne,
- [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_s16ne,
- [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_s16ne
- };
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ return to_s16ne_table[f];
+}
+
+void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {
pa_assert(f >= 0);
pa_assert(f < PA_SAMPLE_MAX);
- return table[f];
+ to_s16ne_table[f] = func;
}
+static pa_convert_func_t from_s16ne_table[] = {
+ [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_s16ne,
+ [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne,
+ [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne,
+ [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne,
+ [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne,
+ [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_s16ne,
+ [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_s16ne,
+ [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_s16ne,
+ [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_s16ne,
+ [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne,
+ [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne,
+ [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_s16ne,
+ [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_s16ne,
+};
+
pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) {
- static const pa_convert_func_t table[] = {
- [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_s16ne,
- [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne,
- [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne,
- [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne,
- [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne,
- [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_s16ne,
- [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_s16ne,
- [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_s16ne,
- [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_s16ne,
- [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne,
- [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne,
- [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_s16ne,
- [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_s16ne,
- };
+ pa_assert(f >= 0);
+ pa_assert(f < PA_SAMPLE_MAX);
+
+ return from_s16ne_table[f];
+}
+
+void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {
pa_assert(f >= 0);
pa_assert(f < PA_SAMPLE_MAX);
- return table[f];
+ from_s16ne_table[f] = func;
}
diff --git a/src/pulsecore/sconv.h b/src/pulsecore/sconv.h
index b00a16a..cd93755 100644
--- a/src/pulsecore/sconv.h
+++ b/src/pulsecore/sconv.h
@@ -33,4 +33,10 @@ pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) P
pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) PA_GCC_PURE;
pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) PA_GCC_PURE;
+void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func);
+void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func);
+
+void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func);
+void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func);
+
#endif
commit a3f4a4f6ba741a996442d7a80cc3e267fab705fb
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 17:54:45 2009 +0200
resamples; refactor the channel remapping bits
Move the channel remapping bits into a separate structure. We'll make this
structure global so that optimized versions can use it to perform the channel
remapping.
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 5a6c398..0d8ca01 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -44,10 +44,20 @@
/* Number of samples of extra space we allow the resamplers to return */
#define EXTRA_FRAMES 128
-typedef void (*pa_do_remap_func_t) (pa_resampler *r, void *d, const void *s, unsigned n);
+typedef struct pa_remap pa_remap_t;
-static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n);
-static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n);
+typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n);
+
+struct pa_remap {
+ pa_sample_format_t *format;
+ pa_sample_spec *i_ss, *o_ss;
+ float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ pa_do_remap_func_t do_remap;
+};
+
+static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n);
+static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n);
struct pa_resampler {
pa_resample_method_t method;
@@ -66,10 +76,8 @@ struct pa_resampler {
pa_convert_func_t to_work_format_func;
pa_convert_func_t from_work_format_func;
- float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
- int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ pa_remap_t remap;
pa_bool_t map_required;
- pa_do_remap_func_t do_remap;
void (*impl_free)(pa_resampler *r);
void (*impl_update_rates)(pa_resampler *r);
@@ -218,6 +226,11 @@ pa_resampler* pa_resampler_new(
r->i_ss = *a;
r->o_ss = *b;
+ /* set up the remap structure */
+ r->remap.i_ss = &r->i_ss;
+ r->remap.o_ss = &r->o_ss;
+ r->remap.format = &r->work_format;
+
if (am)
r->i_cm = *am;
else if (!pa_channel_map_init_auto(&r->i_cm, r->i_ss.channels, PA_CHANNEL_MAP_DEFAULT))
@@ -584,33 +597,41 @@ static int front_rear_side(pa_channel_position_t p) {
static void calc_map_table(pa_resampler *r) {
unsigned oc, ic;
+ unsigned n_oc, n_ic;
pa_bool_t ic_connected[PA_CHANNELS_MAX];
pa_bool_t remix;
pa_strbuf *s;
char *t;
+ pa_remap_t *m;
pa_assert(r);
if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm)))))
return;
- memset(r->map_table_f, 0, sizeof(r->map_table_f));
- memset(r->map_table_i, 0, sizeof(r->map_table_i));
+ m = &r->remap;
+
+ n_oc = r->o_ss.channels;
+ n_ic = r->i_ss.channels;
+
+ memset(m->map_table_f, 0, sizeof(m->map_table_f));
+ memset(m->map_table_i, 0, sizeof(m->map_table_i));
+
memset(ic_connected, 0, sizeof(ic_connected));
remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0;
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
pa_bool_t oc_connected = FALSE;
pa_channel_position_t b = r->o_cm.map[oc];
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
pa_channel_position_t a = r->i_cm.map[ic];
if (r->flags & PA_RESAMPLER_NO_REMAP) {
/* We shall not do any remapping. Hence, just check by index */
if (ic == oc)
- r->map_table_f[oc][ic] = 1.0;
+ m->map_table_f[oc][ic] = 1.0;
continue;
}
@@ -619,7 +640,7 @@ static void calc_map_table(pa_resampler *r) {
/* We shall not do any remixing. Hence, just check by name */
if (a == b)
- r->map_table_f[oc][ic] = 1.0;
+ m->map_table_f[oc][ic] = 1.0;
continue;
}
@@ -694,7 +715,7 @@ static void calc_map_table(pa_resampler *r) {
*/
if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) {
- r->map_table_f[oc][ic] = 1.0;
+ m->map_table_f[oc][ic] = 1.0;
oc_connected = TRUE;
ic_connected[ic] = TRUE;
@@ -712,14 +733,14 @@ static void calc_map_table(pa_resampler *r) {
/* We are not connected and on the left side, let's
* average all left side input channels. */
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_left(r->i_cm.map[ic]))
n++;
if (n > 0)
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_left(r->i_cm.map[ic])) {
- r->map_table_f[oc][ic] = 1.0f / (float) n;
+ m->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -733,14 +754,14 @@ static void calc_map_table(pa_resampler *r) {
/* We are not connected and on the right side, let's
* average all right side input channels. */
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_right(r->i_cm.map[ic]))
n++;
if (n > 0)
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_right(r->i_cm.map[ic])) {
- r->map_table_f[oc][ic] = 1.0f / (float) n;
+ m->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -754,14 +775,14 @@ static void calc_map_table(pa_resampler *r) {
/* We are not connected and at the center. Let's
* average all center input channels. */
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_center(r->i_cm.map[ic]))
n++;
if (n > 0) {
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_center(r->i_cm.map[ic])) {
- r->map_table_f[oc][ic] = 1.0f / (float) n;
+ m->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
} else {
@@ -771,14 +792,14 @@ static void calc_map_table(pa_resampler *r) {
n = 0;
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic]))
n++;
if (n > 0)
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) {
- r->map_table_f[oc][ic] = 1.0f / (float) n;
+ m->map_table_f[oc][ic] = 1.0f / (float) n;
ic_connected[ic] = TRUE;
}
@@ -792,12 +813,12 @@ static void calc_map_table(pa_resampler *r) {
/* We are not connected and an LFE. Let's average all
* channels for LFE. */
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (!(r->flags & PA_RESAMPLER_NO_LFE))
- r->map_table_f[oc][ic] = 1.0f / (float) r->i_ss.channels;
+ m->map_table_f[oc][ic] = 1.0f / (float) n_ic;
else
- r->map_table_f[oc][ic] = 0;
+ m->map_table_f[oc][ic] = 0;
/* Please note that a channel connected to LFE
* doesn't really count as connected. */
@@ -813,7 +834,7 @@ static void calc_map_table(pa_resampler *r) {
ic_unconnected_center = 0,
ic_unconnected_lfe = 0;
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
pa_channel_position_t a = r->i_cm.map[ic];
if (ic_connected[ic])
@@ -836,20 +857,20 @@ static void calc_map_table(pa_resampler *r) {
* the left side by .9 and add in our averaged unconnected
* channels multplied by .1 */
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_left(r->o_cm.map[oc]))
continue;
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (ic_connected[ic]) {
- r->map_table_f[oc][ic] *= .9f;
+ m->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_left(r->i_cm.map[ic]))
- r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left;
+ m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left;
}
}
}
@@ -861,20 +882,20 @@ static void calc_map_table(pa_resampler *r) {
* the right side by .9 and add in our averaged unconnected
* channels multplied by .1 */
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_right(r->o_cm.map[oc]))
continue;
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (ic_connected[ic]) {
- r->map_table_f[oc][ic] *= .9f;
+ m->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_right(r->i_cm.map[ic]))
- r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right;
+ m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right;
}
}
}
@@ -887,20 +908,20 @@ static void calc_map_table(pa_resampler *r) {
* the center side by .9 and add in our averaged unconnected
* channels multplied by .1 */
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_center(r->o_cm.map[oc]))
continue;
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (ic_connected[ic]) {
- r->map_table_f[oc][ic] *= .9f;
+ m->map_table_f[oc][ic] *= .9f;
continue;
}
if (on_center(r->i_cm.map[ic])) {
- r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center;
+ m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center;
mixed_in = TRUE;
}
}
@@ -918,7 +939,7 @@ static void calc_map_table(pa_resampler *r) {
it into left and right. Using .375 and 0.75 as
factors. */
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (ic_connected[ic])
continue;
@@ -926,7 +947,7 @@ static void calc_map_table(pa_resampler *r) {
if (!on_center(r->i_cm.map[ic]))
continue;
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
continue;
@@ -937,7 +958,7 @@ static void calc_map_table(pa_resampler *r) {
}
}
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
continue;
@@ -947,7 +968,7 @@ static void calc_map_table(pa_resampler *r) {
}
}
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
continue;
@@ -955,10 +976,10 @@ static void calc_map_table(pa_resampler *r) {
if (ncenter[oc] <= 0)
continue;
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (ic_connected[ic]) {
- r->map_table_f[oc][ic] *= .75f;
+ m->map_table_f[oc][ic] *= .75f;
continue;
}
@@ -966,7 +987,7 @@ static void calc_map_table(pa_resampler *r) {
continue;
if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc]))
- r->map_table_f[oc][ic] = .375f / (float) ncenter[oc];
+ m->map_table_f[oc][ic] = .375f / (float) ncenter[oc];
}
}
}
@@ -977,37 +998,37 @@ static void calc_map_table(pa_resampler *r) {
/* OK, so there is an unconnected LFE channel. Let's mix
* it into all channels, with factor 0.375 */
- for (ic = 0; ic < r->i_ss.channels; ic++) {
+ for (ic = 0; ic < n_ic; ic++) {
if (!on_lfe(r->i_cm.map[ic]))
continue;
- for (oc = 0; oc < r->o_ss.channels; oc++)
- r->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
+ for (oc = 0; oc < n_oc; oc++)
+ m->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
}
}
}
/* make an 16:16 int version of the matrix */
- for (oc = 0; oc < r->o_ss.channels; oc++)
- for (ic = 0; ic < r->i_ss.channels; ic++)
- r->map_table_i[oc][ic] = (int32_t) (r->map_table_f[oc][ic] * 0x10000);
+ for (oc = 0; oc < n_oc; oc++)
+ for (ic = 0; ic < n_ic; ic++)
+ m->map_table_i[oc][ic] = (int32_t) (m->map_table_f[oc][ic] * 0x10000);
s = pa_strbuf_new();
pa_strbuf_printf(s, " ");
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
pa_strbuf_printf(s, " I%02u ", ic);
pa_strbuf_puts(s, "\n +");
- for (ic = 0; ic < r->i_ss.channels; ic++)
+ for (ic = 0; ic < n_ic; ic++)
pa_strbuf_printf(s, "------");
pa_strbuf_puts(s, "\n");
- for (oc = 0; oc < r->o_ss.channels; oc++) {
+ for (oc = 0; oc < n_oc; oc++) {
pa_strbuf_printf(s, "O%02u |", oc);
- for (ic = 0; ic < r->i_ss.channels; ic++)
- pa_strbuf_printf(s, " %1.3f", r->map_table_f[oc][ic]);
+ for (ic = 0; ic < n_ic; ic++)
+ pa_strbuf_printf(s, " %1.3f", m->map_table_f[oc][ic]);
pa_strbuf_puts(s, "\n");
}
@@ -1016,13 +1037,13 @@ static void calc_map_table(pa_resampler *r) {
pa_xfree(t);
/* find some common channel remappings, fall back to full matrix operation. */
- if (r->i_ss.channels == 1 && r->o_ss.channels == 2 &&
- r->map_table_f[0][0] >= 1.0 && r->map_table_f[1][0] >= 1.0) {
- r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;;
- pa_log_debug("Using mono to stereo remapping");
+ if (n_ic == 1 && n_oc == 2 &&
+ m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
+ m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;;
+ pa_log_info("Using mono to stereo remapping");
} else {
- r->do_remap = (pa_do_remap_func_t) remap_channels_matrix;
- pa_log_debug("Using generic matrix remapping");
+ m->do_remap = (pa_do_remap_func_t) remap_channels_matrix;
+ pa_log_info("Using generic matrix remapping");
}
}
@@ -1064,10 +1085,10 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
return &r->buf1;
}
-static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) {
+static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n) {
unsigned i;
- switch (r->work_format) {
+ switch (*m->format) {
case PA_SAMPLE_FLOAT32NE:
{
float *d, *s;
@@ -1117,27 +1138,26 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un
}
}
-static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) {
- unsigned oc, i;
+static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ unsigned oc, ic, i;
unsigned n_ic, n_oc;
- n_ic = r->i_ss.channels;
- n_oc = r->o_ss.channels;
+ n_ic = m->i_ss->channels;
+ n_oc = m->o_ss->channels;
- memset(dst, 0, r->buf2.length);
-
- switch (r->work_format) {
+ switch (*m->format) {
case PA_SAMPLE_FLOAT32NE:
{
float *d, *s;
+ memset(dst, 0, n * sizeof (float) * n_oc);
+
for (oc = 0; oc < n_oc; oc++) {
- unsigned ic;
for (ic = 0; ic < n_ic; ic++) {
float vol;
- vol = r->map_table_f[oc][ic];
+ vol = m->map_table_f[oc][ic];
if (vol <= 0.0)
continue;
@@ -1161,13 +1181,14 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
{
int16_t *d, *s;
+ memset(dst, 0, n * sizeof (int16_t) * n_oc);
+
for (oc = 0; oc < n_oc; oc++) {
- unsigned ic;
for (ic = 0; ic < n_ic; ic++) {
int32_t vol;
- vol = r->map_table_i[oc][ic];
+ vol = m->map_table_i[oc][ic];
if (vol <= 0)
continue;
@@ -1181,7 +1202,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
} else {
for (i = n; i > 0; i--, s += n_ic, d += n_oc)
*d += (int16_t) (((int32_t)*s * vol) >> 16);
- }
+ }
}
}
break;
@@ -1194,6 +1215,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src,
static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
unsigned in_n_samples, out_n_samples, n_frames;
void *src, *dst;
+ pa_remap_t *remap;
pa_assert(r);
pa_assert(input);
@@ -1222,8 +1244,10 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index);
dst = pa_memblock_acquire(r->buf2.memblock);
- pa_assert (r->do_remap);
- r->do_remap (r, dst, src, n_frames);
+ remap = &r->remap;
+
+ pa_assert (remap->do_remap);
+ remap->do_remap (remap, dst, src, n_frames);
pa_memblock_release(input->memblock);
pa_memblock_release(r->buf2.memblock);
commit ac1f2e0a2e0707636aabd48baa57c124a877f834
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 18:23:42 2009 +0200
remap: move remapping code in separate file
Move the remapping code into a separate file. Have functions to install custom
init functions that can install optimized versions, when they want.
diff --git a/src/Makefile.am b/src/Makefile.am
index eca68b1..b818c3e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -822,6 +822,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/object.c pulsecore/object.h \
pulsecore/play-memblockq.c pulsecore/play-memblockq.h \
pulsecore/play-memchunk.c pulsecore/play-memchunk.h \
+ pulsecore/remap.c pulsecore/remap.h \
pulsecore/resampler.c pulsecore/resampler.h \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
new file mode 100644
index 0000000..2e93afc
--- /dev/null
+++ b/src/pulsecore/remap.c
@@ -0,0 +1,197 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk.com>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include <pulse/sample.h>
+#include <pulsecore/log.h>
+#include <pulsecore/macro.h>
+
+#include "remap.h"
+
+static void remap_mono_to_stereo_c (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ unsigned i;
+
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d, *s;
+
+ d = (float *) dst;
+ s = (float *) src;
+
+ for (i = n >> 2; i; i--) {
+ d[0] = d[1] = s[0];
+ d[2] = d[3] = s[1];
+ d[4] = d[5] = s[2];
+ d[6] = d[7] = s[3];
+ s += 4;
+ d += 8;
+ }
+ for (i = n & 3; i; i--) {
+ d[0] = d[1] = s[0];
+ s++;
+ d += 2;
+ }
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d, *s;
+
+ d = (int16_t *) dst;
+ s = (int16_t *) src;
+
+ for (i = n >> 2; i; i--) {
+ d[0] = d[1] = s[0];
+ d[2] = d[3] = s[1];
+ d[4] = d[5] = s[2];
+ d[6] = d[7] = s[3];
+ s += 4;
+ d += 8;
+ }
+ for (i = n & 3; i; i--) {
+ d[0] = d[1] = s[0];
+ s++;
+ d += 2;
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
+ }
+}
+
+static void remap_channels_matrix_c (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ unsigned oc, ic, i;
+ unsigned n_ic, n_oc;
+
+ n_ic = m->i_ss->channels;
+ n_oc = m->o_ss->channels;
+
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d, *s;
+
+ memset(dst, 0, n * sizeof (float) * n_oc);
+
+ for (oc = 0; oc < n_oc; oc++) {
+
+ for (ic = 0; ic < n_ic; ic++) {
+ float vol;
+
+ vol = m->map_table_f[oc][ic];
+
+ if (vol <= 0.0)
+ continue;
+
+ d = (float *)dst + oc;
+ s = (float *)src + ic;
+
+ if (vol >= 1.0) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s * vol;
+ }
+ }
+ }
+
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d, *s;
+
+ memset(dst, 0, n * sizeof (int16_t) * n_oc);
+
+ for (oc = 0; oc < n_oc; oc++) {
+
+ for (ic = 0; ic < n_ic; ic++) {
+ int32_t vol;
+
+ vol = m->map_table_i[oc][ic];
+
+ if (vol <= 0)
+ continue;
+
+ d = (int16_t *)dst + oc;
+ s = (int16_t *)src + ic;
+
+ if (vol >= 0x10000) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += (int16_t) (((int32_t)*s * vol) >> 16);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
+ }
+}
+
+/* set the function that will execute the remapping based on the matrices */
+static void init_remap_c (pa_remap_t *m) {
+ unsigned n_oc, n_ic;
+
+ n_oc = m->o_ss->channels;
+ n_ic = m->i_ss->channels;
+
+ /* find some common channel remappings, fall back to full matrix operation. */
+ if (n_ic == 1 && n_oc == 2 &&
+ m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
+ m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_c;
+ pa_log_info("Using mono to stereo remapping");
+ } else {
+ m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_c;
+ pa_log_info("Using generic matrix remapping");
+ }
+}
+
+
+/* default C implementation */
+static pa_init_remap_func_t remap_func = init_remap_c;
+
+void pa_init_remap (pa_remap_t *m) {
+ pa_assert (remap_func);
+
+ /* just call the installed remap init functions */
+ remap_func (m);
+}
+
+pa_init_remap_func_t pa_get_init_remap_func(void) {
+ return remap_func;
+}
+
+void pa_set_init_remap_func(pa_init_remap_func_t func) {
+ remap_func = func;
+}
diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h
new file mode 100644
index 0000000..32a67cd
--- /dev/null
+++ b/src/pulsecore/remap.h
@@ -0,0 +1,48 @@
+#ifndef fooremapfoo
+#define fooremapfoo
+
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk.com>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <pulse/sample.h>
+
+typedef struct pa_remap pa_remap_t;
+
+typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n);
+
+struct pa_remap {
+ pa_sample_format_t *format;
+ pa_sample_spec *i_ss, *o_ss;
+ float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ pa_do_remap_func_t do_remap;
+};
+
+void pa_init_remap (pa_remap_t *m);
+
+/* custom installation of init functions */
+typedef void (*pa_init_remap_func_t) (pa_remap_t *m);
+
+pa_init_remap_func_t pa_get_init_remap_func(void);
+void pa_set_init_remap_func(pa_init_remap_func_t func);
+
+#endif /* fooremapfoo */
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 0d8ca01..f1bfa15 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -40,25 +40,11 @@
#include "ffmpeg/avcodec.h"
#include "resampler.h"
+#include "remap.h"
/* Number of samples of extra space we allow the resamplers to return */
#define EXTRA_FRAMES 128
-typedef struct pa_remap pa_remap_t;
-
-typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n);
-
-struct pa_remap {
- pa_sample_format_t *format;
- pa_sample_spec *i_ss, *o_ss;
- float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
- int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
- pa_do_remap_func_t do_remap;
-};
-
-static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n);
-static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n);
-
struct pa_resampler {
pa_resample_method_t method;
pa_resample_flags_t flags;
@@ -1036,16 +1022,8 @@ static void calc_map_table(pa_resampler *r) {
pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s));
pa_xfree(t);
- /* find some common channel remappings, fall back to full matrix operation. */
- if (n_ic == 1 && n_oc == 2 &&
- m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
- m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;;
- pa_log_info("Using mono to stereo remapping");
- } else {
- m->do_remap = (pa_do_remap_func_t) remap_channels_matrix;
- pa_log_info("Using generic matrix remapping");
- }
-
+ /* initialize the remapping function */
+ pa_init_remap (m);
}
static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) {
@@ -1085,133 +1063,6 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
return &r->buf1;
}
-static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n) {
- unsigned i;
-
- switch (*m->format) {
- case PA_SAMPLE_FLOAT32NE:
- {
- float *d, *s;
-
- d = (float *) dst;
- s = (float *) src;
-
- for (i = n >> 2; i; i--) {
- d[0] = d[1] = s[0];
- d[2] = d[3] = s[1];
- d[4] = d[5] = s[2];
- d[6] = d[7] = s[3];
- s += 4;
- d += 8;
- }
- for (i = n & 3; i; i--) {
- d[0] = d[1] = s[0];
- s++;
- d += 2;
- }
- break;
- }
- case PA_SAMPLE_S16NE:
- {
- int16_t *d, *s;
-
- d = (int16_t *) dst;
- s = (int16_t *) src;
-
- for (i = n >> 2; i; i--) {
- d[0] = d[1] = s[0];
- d[2] = d[3] = s[1];
- d[4] = d[5] = s[2];
- d[6] = d[7] = s[3];
- s += 4;
- d += 8;
- }
- for (i = n & 3; i; i--) {
- d[0] = d[1] = s[0];
- s++;
- d += 2;
- }
- break;
- }
- default:
- pa_assert_not_reached();
- }
-}
-
-static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n) {
- unsigned oc, ic, i;
- unsigned n_ic, n_oc;
-
- n_ic = m->i_ss->channels;
- n_oc = m->o_ss->channels;
-
- switch (*m->format) {
- case PA_SAMPLE_FLOAT32NE:
- {
- float *d, *s;
-
- memset(dst, 0, n * sizeof (float) * n_oc);
-
- for (oc = 0; oc < n_oc; oc++) {
-
- for (ic = 0; ic < n_ic; ic++) {
- float vol;
-
- vol = m->map_table_f[oc][ic];
-
- if (vol <= 0.0)
- continue;
-
- d = (float *)dst + oc;
- s = (float *)src + ic;
-
- if (vol >= 1.0) {
- for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d += *s;
- } else {
- for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d += *s * vol;
- }
- }
- }
-
- break;
- }
- case PA_SAMPLE_S16NE:
- {
- int16_t *d, *s;
-
- memset(dst, 0, n * sizeof (int16_t) * n_oc);
-
- for (oc = 0; oc < n_oc; oc++) {
-
- for (ic = 0; ic < n_ic; ic++) {
- int32_t vol;
-
- vol = m->map_table_i[oc][ic];
-
- if (vol <= 0)
- continue;
-
- d = (int16_t *)dst + oc;
- s = (int16_t *)src + ic;
-
- if (vol >= 0x10000) {
- for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d += *s;
- } else {
- for (i = n; i > 0; i--, s += n_ic, d += n_oc)
- *d += (int16_t) (((int32_t)*s * vol) >> 16);
- }
- }
- }
- break;
- }
- default:
- pa_assert_not_reached();
- }
-}
-
static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
unsigned in_n_samples, out_n_samples, n_frames;
void *src, *dst;
commit 28baa53d55fa51d5fbbb1be54db3581fc3d151dd
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 18:29:02 2009 +0200
remap: allow specialisations to install NULL
Fallback to the default C implementation when the remap init function did not
set a function.
diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
index 2e93afc..108df90 100644
--- a/src/pulsecore/remap.c
+++ b/src/pulsecore/remap.c
@@ -186,6 +186,11 @@ void pa_init_remap (pa_remap_t *m) {
/* just call the installed remap init functions */
remap_func (m);
+
+ if (m->do_remap == NULL) {
+ /* nothing was installed, fallback to C versions */
+ init_remap_c (m);
+ }
}
pa_init_remap_func_t pa_get_init_remap_func(void) {
commit e961efc130481ff4c5a053eb03dd3ec4d513c615
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 18:32:51 2009 +0200
remap: init the do_remap function to NULL
diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
index 108df90..a0fc85b 100644
--- a/src/pulsecore/remap.c
+++ b/src/pulsecore/remap.c
@@ -184,11 +184,13 @@ static pa_init_remap_func_t remap_func = init_remap_c;
void pa_init_remap (pa_remap_t *m) {
pa_assert (remap_func);
- /* just call the installed remap init functions */
+ m->do_remap = NULL;
+
+ /* call the installed remap init function */
remap_func (m);
if (m->do_remap == NULL) {
- /* nothing was installed, fallback to C versions */
+ /* nothing was installed, fallback to C version */
init_remap_c (m);
}
}
commit 6e5dbed51ee508759ed8b5adabc998ba8faf4774
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 19:46:06 2009 +0200
remap: add MMX mono to stereo
diff --git a/src/Makefile.am b/src/Makefile.am
index b818c3e..ab91be8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -823,6 +823,7 @@ libpulsecore_ at PA_MAJORMINORMICRO@_la_SOURCES = \
pulsecore/play-memblockq.c pulsecore/play-memblockq.h \
pulsecore/play-memchunk.c pulsecore/play-memchunk.h \
pulsecore/remap.c pulsecore/remap.h \
+ pulsecore/remap_mmx.c \
pulsecore/resampler.c pulsecore/resampler.h \
pulsecore/rtpoll.c pulsecore/rtpoll.h \
pulsecore/sample-util.c pulsecore/sample-util.h \
diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
index 0457199..bc093ec 100644
--- a/src/pulsecore/cpu-x86.c
+++ b/src/pulsecore/cpu-x86.c
@@ -110,8 +110,10 @@ void pa_cpu_init_x86 (void) {
(flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
/* activate various optimisations */
- if (flags & PA_CPU_X86_MMX)
+ if (flags & PA_CPU_X86_MMX) {
pa_volume_func_init_mmx (flags);
+ pa_remap_func_init_mmx (flags);
+ }
if (flags & PA_CPU_X86_SSE)
pa_volume_func_init_sse (flags);
diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h
index 07e630e..b11ef6e 100644
--- a/src/pulsecore/cpu-x86.h
+++ b/src/pulsecore/cpu-x86.h
@@ -63,4 +63,6 @@ typedef int64_t pa_reg_x86;
void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags);
void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags);
+void pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags);
+
#endif /* foocpux86hfoo */
diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c
new file mode 100644
index 0000000..6690cfa
--- /dev/null
+++ b/src/pulsecore/remap_mmx.c
@@ -0,0 +1,174 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk.com>
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include <pulse/sample.h>
+#include <pulsecore/log.h>
+#include <pulsecore/macro.h>
+
+#include "cpu-x86.h"
+#include "remap.h"
+
+#define LOAD_SAMPLES \
+ " movq (%1), %%mm0 \n\t" \
+ " movq 8(%1), %%mm2 \n\t" \
+ " movq 16(%1), %%mm4 \n\t" \
+ " movq 24(%1), %%mm6 \n\t" \
+ " movq %%mm0, %%mm1 \n\t" \
+ " movq %%mm2, %%mm3 \n\t" \
+ " movq %%mm4, %%mm5 \n\t" \
+ " movq %%mm6, %%mm7 \n\t"
+
+#define UNPACK_SAMPLES(s) \
+ " punpckl"#s" %%mm0, %%mm0 \n\t" \
+ " punpckh"#s" %%mm1, %%mm1 \n\t" \
+ " punpckl"#s" %%mm2, %%mm2 \n\t" \
+ " punpckh"#s" %%mm3, %%mm3 \n\t" \
+ " punpckl"#s" %%mm4, %%mm4 \n\t" \
+ " punpckh"#s" %%mm5, %%mm5 \n\t" \
+ " punpckl"#s" %%mm6, %%mm6 \n\t" \
+ " punpckh"#s" %%mm7, %%mm7 \n\t" \
+
+#define STORE_SAMPLES \
+ " movq %%mm0, (%0) \n\t" \
+ " movq %%mm1, 8(%0) \n\t" \
+ " movq %%mm2, 16(%0) \n\t" \
+ " movq %%mm3, 24(%0) \n\t" \
+ " movq %%mm4, 32(%0) \n\t" \
+ " movq %%mm5, 40(%0) \n\t" \
+ " movq %%mm6, 48(%0) \n\t" \
+ " movq %%mm7, 56(%0) \n\t" \
+ " add $32, %1 \n\t" \
+ " add $64, %0 \n\t"
+
+#define HANDLE_SINGLE(s) \
+ " movd (%1), %%mm0 \n\t" \
+ " movq %%mm0, %%mm1 \n\t" \
+ " punpckl"#s" %%mm0, %%mm0 \n\t" \
+ " movq %%mm0, (%0) \n\t" \
+ " add $4, %1 \n\t" \
+ " add $8, %0 \n\t"
+
+static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ pa_reg_x86 temp;
+
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ __asm__ __volatile__ (
+ " mov %3, %2 \n\t"
+ " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 2f \n\t"
+
+ "1: \n\t" /* do samples in groups of 8 */
+ LOAD_SAMPLES
+ UNPACK_SAMPLES(dq)
+ STORE_SAMPLES
+ " dec %2 \n\t"
+ " jne 1b \n\t"
+
+ "2: \n\t"
+ " mov %3, %2 \n\t"
+ " and $7, %2 \n\t" /* prepare for processing the remaining samples */
+ " je 4f \n\t"
+
+ "3: \n\t"
+ HANDLE_SINGLE(dq)
+ " dec %2 \n\t"
+ " jne 3b \n\t"
+
+ "4: \n\t"
+ " emms \n\t"
+
+ : "+r" (dst), "+r" (src), "=&r" (temp)
+ : "r" ((pa_reg_x86)n)
+ : "cc"
+ );
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ __asm__ __volatile__ (
+ " mov %3, %2 \n\t"
+ " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */
+ " cmp $0, %2 \n\t"
+ " je 2f \n\t"
+
+ "1: \n\t" /* do samples in groups of 16 */
+ LOAD_SAMPLES
+ UNPACK_SAMPLES(wd)
+ STORE_SAMPLES
+ " dec %2 \n\t"
+ " jne 1b \n\t"
+
+ "2: \n\t"
+ " mov %3, %2 \n\t"
+ " and $7, %2 \n\t" /* prepare for processing the remaining samples */
+ " je 4f \n\t"
+
+ "3: \n\t"
+ HANDLE_SINGLE(wd)
+ " dec %2 \n\t"
+ " jne 3b \n\t"
+
+ "4: \n\t"
+ " emms \n\t"
+
+ : "+r" (dst), "+r" (src), "=&r" (temp)
+ : "r" ((pa_reg_x86)n)
+ : "cc"
+ );
+ break;
+ }
+ default:
+ pa_assert_not_reached();
+ }
+}
+
+/* set the function that will execute the remapping based on the matrices */
+static void init_remap_mmx (pa_remap_t *m) {
+ unsigned n_oc, n_ic;
+
+ n_oc = m->o_ss->channels;
+ n_ic = m->i_ss->channels;
+
+ /* find some common channel remappings, fall back to full matrix operation. */
+ if (n_ic == 1 && n_oc == 2 &&
+ m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
+ m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_mmx;
+ pa_log_info("Using MMX mono to stereo remapping");
+ }
+}
+
+void pa_remap_func_init_mmx (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
+ pa_log_info("Initialising MMX optimized remappers.");
+
+ pa_set_init_remap_func ((pa_init_remap_func_t) init_remap_mmx);
+#endif /* defined (__i386__) || defined (__amd64__) */
+}
commit 6076cef2092391d8b46aa84f86857cffebce4583
Author: Wim Taymans <wim.taymans at collabora.co.uk>
Date: Thu Aug 20 20:00:50 2009 +0200
remap: make the MMX code pretier
diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c
index 6690cfa..bfcae6c 100644
--- a/src/pulsecore/remap_mmx.c
+++ b/src/pulsecore/remap_mmx.c
@@ -73,6 +73,28 @@
" add $4, %1 \n\t" \
" add $8, %0 \n\t"
+#define MONO_TO_STEREO(s) \
+ " mov %3, %2 \n\t" \
+ " sar $3, %2 \n\t" \
+ " cmp $0, %2 \n\t" \
+ " je 2f \n\t" \
+ "1: \n\t" \
+ LOAD_SAMPLES \
+ UNPACK_SAMPLES(s) \
+ STORE_SAMPLES \
+ " dec %2 \n\t" \
+ " jne 1b \n\t" \
+ "2: \n\t" \
+ " mov %3, %2 \n\t" \
+ " and $7, %2 \n\t" \
+ " je 4f \n\t" \
+ "3: \n\t" \
+ HANDLE_SINGLE(s) \
+ " dec %2 \n\t" \
+ " jne 3b \n\t" \
+ "4: \n\t" \
+ " emms \n\t"
+
static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) {
pa_reg_x86 temp;
@@ -80,31 +102,7 @@ static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src,
case PA_SAMPLE_FLOAT32NE:
{
__asm__ __volatile__ (
- " mov %3, %2 \n\t"
- " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */
- " cmp $0, %2 \n\t"
- " je 2f \n\t"
-
- "1: \n\t" /* do samples in groups of 8 */
- LOAD_SAMPLES
- UNPACK_SAMPLES(dq)
- STORE_SAMPLES
- " dec %2 \n\t"
- " jne 1b \n\t"
-
- "2: \n\t"
- " mov %3, %2 \n\t"
- " and $7, %2 \n\t" /* prepare for processing the remaining samples */
- " je 4f \n\t"
-
- "3: \n\t"
- HANDLE_SINGLE(dq)
- " dec %2 \n\t"
- " jne 3b \n\t"
-
- "4: \n\t"
- " emms \n\t"
-
+ MONO_TO_STEREO(dq) /* do doubles to quads */
: "+r" (dst), "+r" (src), "=&r" (temp)
: "r" ((pa_reg_x86)n)
: "cc"
@@ -114,31 +112,7 @@ static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src,
case PA_SAMPLE_S16NE:
{
__asm__ __volatile__ (
- " mov %3, %2 \n\t"
- " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */
- " cmp $0, %2 \n\t"
- " je 2f \n\t"
-
- "1: \n\t" /* do samples in groups of 16 */
- LOAD_SAMPLES
- UNPACK_SAMPLES(wd)
- STORE_SAMPLES
- " dec %2 \n\t"
- " jne 1b \n\t"
-
- "2: \n\t"
- " mov %3, %2 \n\t"
- " and $7, %2 \n\t" /* prepare for processing the remaining samples */
- " je 4f \n\t"
-
- "3: \n\t"
- HANDLE_SINGLE(wd)
- " dec %2 \n\t"
- " jne 3b \n\t"
-
- "4: \n\t"
- " emms \n\t"
-
+ MONO_TO_STEREO(wd) /* do words to doubles */
: "+r" (dst), "+r" (src), "=&r" (temp)
: "r" ((pa_reg_x86)n)
: "cc"
commit ab5ac06ac76c3afbbd99bce2840329dd74756a73
Merge: d6fb8d1 6076cef
Author: Lennart Poettering <lennart at poettering.net>
Date: Sun Aug 23 00:06:35 2009 +0200
Merge commit 'wtay/optimize'
commit a0f01ddc951694e1d13f44dc3a5d0d3fb2daa142
Author: Lennart Poettering <lennart at poettering.net>
Date: Sun Aug 23 21:49:37 2009 +0200
port a few things over to use xmalloc and friends instead of low-level libc malloc/free directly
diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c
index 0eb32cc..1c8c678 100644
--- a/src/pulsecore/core-util.c
+++ b/src/pulsecore/core-util.c
@@ -2223,7 +2223,7 @@ int pa_close_all(int except_fd, ...) {
va_end(ap);
r = pa_close_allv(p);
- free(p);
+ pa_xfree(p);
return r;
}
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
index 5a994b7..453b784 100644
--- a/src/pulsecore/cpu-arm.c
+++ b/src/pulsecore/cpu-arm.c
@@ -2,7 +2,7 @@
This file is part of PulseAudio.
Copyright 2004-2006 Lennart Poettering
- Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
+ Copyright 2009 Wim Taymans <wim.taymans at collabora.co.uk>
PulseAudio is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
@@ -60,21 +60,20 @@ static char *get_cpuinfo(void) {
char *cpuinfo;
int n, fd;
- if (!(cpuinfo = malloc(MAX_BUFFER)))
- return NULL;
+ cpuinfo = pa_xmalloc(MAX_BUFFER);
if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) {
- free (cpuinfo);
+ pa_xfree(cpuinfo);
return NULL;
}
- if ((n = read(fd, cpuinfo, MAX_BUFFER-1)) < 0) {
- free (cpuinfo);
- close (fd);
+ if ((n = pa_read(fd, cpuinfo, MAX_BUFFER-1)) < 0) {
+ pa_xfree(cpuinfo);
+ pa_close(fd);
return NULL;
}
cpuinfo[n] = 0;
- close (fd);
+ pa_close(fd);
return cpuinfo;
}
@@ -102,7 +101,7 @@ void pa_cpu_init_arm (void) {
if (arch >= 7)
flags |= PA_CPU_ARM_V7;
- free (line);
+ pa_xfree(line);
}
/* get the CPU features */
if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
@@ -118,10 +117,10 @@ void pa_cpu_init_arm (void) {
else if (!strcmp (current, "vfpv3"))
flags |= PA_CPU_ARM_VFPV3;
- free (current);
+ pa_xfree(current);
}
}
- free (cpuinfo);
+ pa_xfree(cpuinfo);
pa_log_info ("CPU flags: %s%s%s%s%s%s",
(flags & PA_CPU_ARM_V6) ? "V6 " : "",
--
hooks/post-receive
PulseAudio Sound Server
More information about the pulseaudio-commits
mailing list