[pulseaudio-discuss] [RFC PATCH] alsa-sink: Reduce hardware pointer update syscalls
Peter Meerwald
pmeerw at pmeerw.net
Wed Oct 1 08:11:32 PDT 2014
Hello,
> Calling snd_pcm_avail/delay causes a syscall to the kernel, which
> communicates with the audio hardware, and can therefore be expensive
> on some cards.
here is my benchmarking, I'm not super-confident in the results
similar to Alexander, I am seeing an assertion immediately:
pulseaudio[2071]: Assertion 'frames > 0' failed at modules/alsa/alsa-sink.c:651, function mmap_write(). Aborting.
not loading the suspend-on-idle module didn't help, instead I patched
- if (!after_avail && frames == 0)
+ if (frames == 0)
break;
immediately before the assert(); not sure if this is correct
snd_pcm_mmap_begin() which is called by pa_alsa_safe_mmap_begin()
explicitly states that frames may be set to 0 if no buffer is full -- so I
think 0 should NOT lead to an assertion
I don't understand the meaning of after_avail; in
if (!after_avail && (int) sframes == -EAGAIN)
down a few lines, after_avail is always false!?
anyway, above patch might have fixed the issue and I was able to do some
measurements on a beagleboard (ARMv7, OMAP3, TPS65950 PMIC audio),
Linux kernel 3.7 and PA shortly before the srbchannel patches...
I am using htop and look at the CPU% for the alsa-sink thread while doing
paplay -v -v --latency-msec=20 audio_at_32KHz.wav (hardware is doing 32KHz
as well, so no resampling)
--latency-msec influences the measurement I lot, I think lower values are
more interesting, using 20msec to stress the CPU a bit
without the patch, I get about 17% CPU
with the patch, I get about 15.8% CPU
using perf top -p <pid of alsa-sink thread> when playing back for 10sec or
so:
with patch:
3.22% [kernel] [k] snd_pcm_status
3.17% [kernel] [k] __do_softirq
2.10% [kernel] [k] do_sys_poll
2.02% [kernel] [k] finish_task_switch.constprop.59
1.79% libc-2.15.so [.] 0x000d2aac
1.76% libprotocol-native.so [.] 0x0000b040
1.74% libalsa-util.so [.] thread_func
1.73% [kernel] [k] fget_light
1.64% [kernel] [k] eventfd_write
1.51% [kernel] [k] clocksource_mmio_readl_up
1.50% libpulsecore-5.0.so [.] 0x0000e0b0
1.50% [kernel] [k] eventfd_poll
1.47% libpthread-2.15.so [.] 0x0000e7e0
1.35% libpulsecommon-5.0.so [.] 0x0000e5c0
1.31% libpulsecommon-5.0.so [.] stack_pop
1.31% libpulsecommon-5.0.so [.] pa_memblock_unref
1.27% [kernel] [k] __wake_up_sync_key
1.24% [kernel] [k] vector_swi
1.12% libc-2.15.so [.] memcpy
1.02% ld-2.15.so [.] __tls_get_addr
0.96% libpulsecore-5.0.so [.] pa_sink_input_peek
0.91% libpulsecommon-5.0.so [.] stack_push
0.90% libpulse.so.0.17.3 [.] 0x00008bd0
0.88% libalsa-util.so [.] 0x0000990c
0.83% libpulsecore-5.0.so [.] pa_rtpoll_run
without patch:
3.95% [kernel] [k] snd_pcm_hwsync
3.06% [kernel] [k] __do_softirq
1.97% [kernel] [k] finish_task_switch.constprop.59
1.86% [kernel] [k] do_sys_poll
1.80% libprotocol-native.so [.] 0x0000870c
1.77% libpulsecore-5.0.so [.] 0x0000e428
1.76% libc-2.15.so [.] 0x0006c728
1.74% [kernel] [k] fget_light
1.42% [kernel] [k] clocksource_mmio_readl_up
1.36% libpthread-2.15.so [.] 0x00004980
1.35% libpulsecommon-5.0.so [.] pa_memblock_unref
1.32% [kernel] [k] vector_swi
1.31% [kernel] [k] eventfd_write
1.19% libalsa-util.so [.] thread_func
1.14% [kernel] [k] eventfd_poll
1.14% libpulsecommon-5.0.so [.] stack_pop
1.09% libpulsecommon-5.0.so [.] stack_push
1.09% libc-2.15.so [.] memcpy
0.97% libpulsecommon-5.0.so [.] 0x0000de14
0.95% ld-2.15.so [.] __tls_get_addr
0.91% libpulse.so.0.17.3 [.] 0x00008bd0
0.83% [kernel] [k] __wake_up_sync_key
0.80% ld-2.15.so [.] 0x00012780
0.80% libpulsecommon-5.0.so [.] do_pstream_read_write
0.80% libpulsecore-5.0.so [.] pa_rtpoll_run
0.74% libpulsecore-5.0.so [.] pa_sink_input_peek
0.74% libalsa-util.so [.] 0x0000a5a0
0.73% [kernel] [k] snd_pcm_status
we can see that snd_pcm_hwsync() is gone with the patch, but
snd_pcm_status() is still prominent
the patch MIGHT safe a little bit of CPU
an observation: the latency as reported by paplay fluctuates more with the
patch than without, but no hard numbers (yet)
regards, p.
> diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c
> index 9e9b863..5330c78 100644
> --- a/src/modules/alsa/alsa-sink.c
> +++ b/src/modules/alsa/alsa-sink.c
> @@ -155,6 +155,11 @@ struct userdata {
> pa_reserve_monitor_wrapper *monitor;
> pa_hook_slot *monitor_slot;
>
> + snd_pcm_status_t *pcm_status;
> + snd_pcm_sframes_t pcm_status_delay;
> + snd_pcm_sframes_t pcm_status_avail;
> + bool pcm_status_valid;
> +
> /* ucm context */
> pa_alsa_ucm_mapping_context *ucm_context;
> };
> @@ -440,6 +445,7 @@ static int try_recover(struct userdata *u, const char *call, int err) {
> if (err == -ESTRPIPE)
> pa_log_debug("%s: System suspended!", call);
>
> + u->pcm_status_valid = false;
> if ((err = snd_pcm_recover(u->pcm_handle, err, 1)) < 0) {
> pa_log("%s: %s", call, pa_alsa_strerror(err));
> return -1;
> @@ -450,6 +456,29 @@ static int try_recover(struct userdata *u, const char *call, int err) {
> return 0;
> }
>
> +static bool ensure_pcm_status(struct userdata *u) {
> + int err;
> +
> + if (u->pcm_status_valid)
> + return true;
> +#ifdef DEBUG_TIMING
> + pa_log_debug("Calling snd_pcm_status (hwsync)");
> +#endif
> + if (PA_UNLIKELY((err = pa_alsa_safe_delay(u->pcm_handle, u->pcm_status, &u->pcm_status_delay, u->hwbuf_size, &u->sink->sample_spec, false)) < 0)) {
> + pa_log_warn("Failed to retrieve current pcm status: %s", pa_alsa_strerror(err));
> + if (try_recover(u, "snd_pcm_status", err) < 0)
> + return false;
> + if ((err = pa_alsa_safe_delay(u->pcm_handle, u->pcm_status, &u->pcm_status_delay, u->hwbuf_size, &u->sink->sample_spec, false)) < 0) {
> + pa_log_warn("Failed to re-retrieve current pcm status: %s", pa_alsa_strerror(err));
> + return false;
> + }
> + }
> +
> + u->pcm_status_avail = snd_pcm_status_get_avail(u->pcm_status); /* FIXME: This should be checked for weird values, just like delay */
> + u->pcm_status_valid = true;
> + return true;
> +}
> +
> static size_t check_left_to_play(struct userdata *u, size_t n_bytes, bool on_timeout) {
> size_t left_to_play;
> bool underrun = false;
> @@ -521,23 +550,16 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
> hw_sleep_time(u, &max_sleep_usec, &process_usec);
>
> for (;;) {
> - snd_pcm_sframes_t n;
> size_t n_bytes;
> int r;
> bool after_avail = true;
>
> /* First we determine how many samples are missing to fill the
> * buffer up to 100% */
> + if (PA_UNLIKELY(!ensure_pcm_status(u)))
> + return -1;
>
> - if (PA_UNLIKELY((n = pa_alsa_safe_avail(u->pcm_handle, u->hwbuf_size, &u->sink->sample_spec)) < 0)) {
> -
> - if ((r = try_recover(u, "snd_pcm_avail", (int) n)) == 0)
> - continue;
> -
> - return r;
> - }
> -
> - n_bytes = (size_t) n * u->frame_size;
> + n_bytes = (size_t) u->pcm_status_avail * u->frame_size;
>
> #ifdef DEBUG_TIMING
> pa_log_debug("avail: %lu", (unsigned long) n_bytes);
> @@ -609,6 +631,7 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
>
> if (PA_UNLIKELY((err = pa_alsa_safe_mmap_begin(u->pcm_handle, &areas, &offset, &frames, u->hwbuf_size, &u->sink->sample_spec)) < 0)) {
>
> + u->pcm_status_valid = false;
> if (!after_avail && err == -EAGAIN)
> break;
>
> @@ -647,6 +670,7 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
> pa_memblock_unref_fixed(chunk.memblock);
>
> if (PA_UNLIKELY((sframes = snd_pcm_mmap_commit(u->pcm_handle, offset, frames)) < 0)) {
> + u->pcm_status_valid = false;
>
> if (!after_avail && (int) sframes == -EAGAIN)
> break;
> @@ -661,6 +685,8 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
>
> u->write_count += written;
> u->since_start += written;
> + u->pcm_status_avail -= sframes;
> + u->pcm_status_delay += sframes;
>
> #ifdef DEBUG_TIMING
> pa_log_debug("Wrote %lu bytes (of possible %lu bytes)", (unsigned long) written, (unsigned long) n_bytes);
> @@ -706,20 +732,14 @@ static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
> hw_sleep_time(u, &max_sleep_usec, &process_usec);
>
> for (;;) {
> - snd_pcm_sframes_t n;
> size_t n_bytes;
> int r;
> bool after_avail = true;
>
> - if (PA_UNLIKELY((n = pa_alsa_safe_avail(u->pcm_handle, u->hwbuf_size, &u->sink->sample_spec)) < 0)) {
> + if (PA_UNLIKELY(!ensure_pcm_status(u)))
> + return -1;
>
> - if ((r = try_recover(u, "snd_pcm_avail", (int) n)) == 0)
> - continue;
> -
> - return r;
> - }
> -
> - n_bytes = (size_t) n * u->frame_size;
> + n_bytes = (size_t) u->pcm_status_avail * u->frame_size;
>
> #ifdef DEBUG_TIMING
> pa_log_debug("avail: %lu", (unsigned long) n_bytes);
> @@ -789,6 +809,7 @@ static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
>
> if (PA_UNLIKELY(frames < 0)) {
>
> + u->pcm_status_valid = false;
> if (!after_avail && (int) frames == -EAGAIN)
> break;
>
> @@ -815,6 +836,8 @@ static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
>
> work_done = true;
>
> + u->pcm_status_avail -= frames;
> + u->pcm_status_delay += frames;
> u->write_count += written;
> u->since_start += written;
>
> @@ -848,26 +871,19 @@ static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, bool polled, bo
> }
>
> static void update_smoother(struct userdata *u) {
> - snd_pcm_sframes_t delay = 0;
> int64_t position;
> - int err;
> pa_usec_t now1 = 0, now2;
> - snd_pcm_status_t *status;
> snd_htimestamp_t htstamp = { 0, 0 };
>
> - snd_pcm_status_alloca(&status);
> -
> pa_assert(u);
> pa_assert(u->pcm_handle);
> -
> /* Let's update the time smoother */
> -
> - if (PA_UNLIKELY((err = pa_alsa_safe_delay(u->pcm_handle, status, &delay, u->hwbuf_size, &u->sink->sample_spec, false)) < 0)) {
> - pa_log_warn("Failed to query DSP status data: %s", pa_alsa_strerror(err));
> + if (!ensure_pcm_status(u)) {
> + pa_log_warn("Failed to query DSP status data");
> return;
> }
>
> - snd_pcm_status_get_htstamp(status, &htstamp);
> + snd_pcm_status_get_htstamp(u->pcm_status, &htstamp);
> now1 = pa_timespec_load(&htstamp);
>
> /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */
> @@ -879,7 +895,7 @@ static void update_smoother(struct userdata *u) {
> if (u->last_smoother_update + u->smoother_interval > now1)
> return;
>
> - position = (int64_t) u->write_count - ((int64_t) delay * (int64_t) u->frame_size);
> + position = (int64_t) u->write_count - ((int64_t) u->pcm_status_delay * (int64_t) u->frame_size);
>
> if (PA_UNLIKELY(position < 0))
> position = 0;
> @@ -935,6 +951,7 @@ static int suspend(struct userdata *u) {
>
> /* Let's suspend -- we don't call snd_pcm_drain() here since that might
> * take awfully long with our long buffer sizes today. */
> + u->pcm_status_valid = false;
> snd_pcm_close(u->pcm_handle);
> u->pcm_handle = NULL;
>
> @@ -1002,6 +1019,7 @@ static int update_sw_params(struct userdata *u) {
>
> pa_log_debug("setting avail_min=%lu", (unsigned long) avail_min);
>
> + u->pcm_status_valid = false;
> if ((err = pa_alsa_set_sw_params(u->pcm_handle, avail_min, !u->use_tsched)) < 0) {
> pa_log("Failed to set software parameters: %s", pa_alsa_strerror(err));
> return err;
> @@ -1073,6 +1091,7 @@ static int unsuspend(struct userdata *u) {
> pa_snprintf(device_name, len, "%s,AES0=6", u->device_name);
> }
>
> + u->pcm_status_valid = false;
> if ((err = snd_pcm_open(&u->pcm_handle, device_name ? device_name : u->device_name, SND_PCM_STREAM_PLAYBACK,
> SND_PCM_NONBLOCK|
> SND_PCM_NO_AUTO_RESAMPLE|
> @@ -1136,6 +1155,7 @@ static int unsuspend(struct userdata *u) {
>
> fail:
> if (u->pcm_handle) {
> + u->pcm_status_valid = false;
> snd_pcm_close(u->pcm_handle);
> u->pcm_handle = NULL;
> }
> @@ -1653,6 +1673,7 @@ static int process_rewind(struct userdata *u) {
>
> in_frames = (snd_pcm_sframes_t) (rewind_nbytes / u->frame_size);
> pa_log_debug("before: %lu", (unsigned long) in_frames);
> + u->pcm_status_valid = false;
> if ((out_frames = snd_pcm_rewind(u->pcm_handle, (snd_pcm_uframes_t) in_frames)) < 0) {
> pa_log("snd_pcm_rewind() failed: %s", pa_alsa_strerror((int) out_frames));
> if (try_recover(u, "process_rewind", out_frames) < 0)
> @@ -1727,6 +1748,7 @@ static void thread_func(void *userdata) {
>
> if (u->first) {
> pa_log_info("Starting playback.");
> + u->pcm_status_valid = false;
> snd_pcm_start(u->pcm_handle);
>
> pa_smoother_resume(u->smoother, pa_rtclock_now(), true);
> @@ -1811,6 +1833,7 @@ static void thread_func(void *userdata) {
> (double) (real_sleep - rtpoll_sleep) / PA_USEC_PER_MSEC,
> (double) (u->tsched_watermark_usec) / PA_USEC_PER_MSEC);
> }
> + u->pcm_status_valid = false;
>
> if (u->sink->flags & PA_SINK_DEFERRED_VOLUME)
> pa_sink_volume_change_apply(u->sink, NULL);
> @@ -1827,11 +1850,14 @@ static void thread_func(void *userdata) {
> pollfd = pa_rtpoll_item_get_pollfd(u->alsa_rtpoll_item, &n);
>
> if ((err = snd_pcm_poll_descriptors_revents(u->pcm_handle, pollfd, n, &revents)) < 0) {
> + u->pcm_status_valid = false;
> pa_log("snd_pcm_poll_descriptors_revents() failed: %s", pa_alsa_strerror(err));
> goto fail;
> }
>
> if (revents & ~POLLOUT) {
> + u->pcm_status_valid = false;
> +
> if (pa_alsa_recover_from_poll(u->pcm_handle, revents) < 0)
> goto fail;
>
> @@ -2141,6 +2167,9 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca
> if (reserve_monitor_init(u, dev_id) < 0)
> goto fail;
>
> + if (snd_pcm_status_malloc(&u->pcm_status) < 0)
> + goto fail;
> +
> b = use_mmap;
> d = use_tsched;
>
> @@ -2465,6 +2494,9 @@ static void userdata_free(struct userdata *u) {
> snd_pcm_close(u->pcm_handle);
> }
>
> + if (u->pcm_status)
> + snd_pcm_status_free(u->pcm_status);
> +
> if (u->mixer_fdl)
> pa_alsa_fdlist_free(u->mixer_fdl);
>
>
--
Peter Meerwald
+43-664-2444418 (mobile)
More information about the pulseaudio-discuss
mailing list