[pulseaudio-discuss] [PATCH 07/13] loopback: Refactor latency initialization

Thu Nov 19 16:03:07 PST 2015

On Wed, 2015-02-25 at 19:43 +0100, Georg Chini wrote:
> as well as the way of reacting to sink input or source output moving.
> 
> The goal is to make sure that the initial latency of the system matches
> the configured one.
> 
> While at it, allow to set adjust_time to 0, which means "no adjustments
> at all".
> ---
>  src/modules/module-loopback.c | 329 +++++++++++++++++++++++++++++++++++-------
>  1 file changed, 275 insertions(+), 54 deletions(-)
> 
> diff --git a/src/modules/module-loopback.c b/src/modules/module-loopback.c
> index 79bd106..09f2f58 100644
> --- a/src/modules/module-loopback.c
> +++ b/src/modules/module-loopback.c
> @@ -59,7 +59,9 @@ PA_MODULE_USAGE(
>  
>  #define DEFAULT_LATENCY_MSEC 200
>  
> -#define MEMBLOCKQ_MAXLENGTH (1024*1024*16)
> +#define MEMBLOCKQ_MAXLENGTH (1024*1024*32)

Why is this change done? Whatever the reason is, shouldn't this be done
in a separate patch?

> +
> +#define DEFAULT_BUFFER_MARGIN_MSEC 20
>  
>  #define DEFAULT_ADJUST_TIME_USEC (10*PA_USEC_PER_SEC)
>  
> @@ -80,11 +82,21 @@ struct userdata {
>  
>      int64_t recv_counter;
>      int64_t send_counter;
> +    uint32_t sink_adjust_counter;
> +    uint32_t source_adjust_counter;
>  
> -    size_t skip;
>      pa_usec_t latency;
> +    pa_usec_t buffer_latency;
> +    pa_usec_t initial_buffer_latency;
> +    pa_usec_t configured_sink_latency;
> +    pa_usec_t configured_source_latency;

It would be nice to have comments about what each of these different
latency variables mean.

I'm trying to understand what buffer_latency is... I suppose it's used
to configure the buffering so that the sink, source and buffer_latency
together equal u->latency (i.e. the user-configured loopback latency).
It's 1/4 of u->latency by default, so I guess you try to configure the
sink and source latencies to be 3/8 of u->latency each? buffer_latency
can't be less than 1.667 ms, however. (Where does that number come
from?) 

Hmm... When configuring the sink and source latencies, you divide u-
>latency by 3, which is consistent with the old code, so maybe it was a
typo to divide u->latency by 4 when setting the default buffer_latency?
I'll assume it was a typo, and you meant all three latency components
to be one third of u->latency by default.

buffer_latency can't be less than 3/4 of the sink or source latency
(where does that 3/4 come from?), so if the sink or source latency
exceeds that limit, buffer_latency is raised accordingly. That's with
dynamic latency support. If the sink or source doesn't support dynamic
latency, buffer_latency is raised to default_fragment_size_msec + 20
ms. I don't think it makes sense to use default_fragment_size_msec.
That variable is not guaranteed to have any relation to the sink/source
behaviour. Something derived from max_request for sinks would probably
be appropriate. I'm not sure about sources, maybe the fixed_latency
variable could be used.

> +
> +    pa_usec_t source_latency_sum;
> +    pa_usec_t sink_latency_sum;
>      bool in_pop;
> +    bool pop_called;
> +    bool source_sink_changed;
>  
>      struct {
>          int64_t send_counter;
> @@ -189,13 +201,20 @@ static uint32_t rate_controller(
>  static void adjust_rates(struct userdata *u) {
>      size_t buffer;
>      uint32_t old_rate, base_rate, new_rate;
> -    pa_usec_t final_latency, current_buffer_latency, current_latency, corrected_latency;
> +    pa_usec_t final_latency, source_sink_latency, current_buffer_latency, current_latency, corrected_latency;
>      int32_t latency_difference;
>      pa_usec_t snapshot_delay;
>  
>      pa_assert(u);
>      pa_assert_ctl_context();
>  
> +    u->sink_adjust_counter +=1;
> +    u->source_adjust_counter +=1;
> +
> +    /* Latency sums */
> +    u->source_latency_sum += u->latency_snapshot.source_latency;
> +    u->sink_latency_sum += u->latency_snapshot.sink_latency;
> +
>      /* Rates and latencies*/
>      old_rate = u->sink_input->sample_spec.rate;
>      base_rate = u->source_output->sample_spec.rate;
> @@ -210,10 +229,12 @@ static void adjust_rates(struct userdata *u) {
>      snapshot_delay = u->latency_snapshot.source_timestamp - u->latency_snapshot.sink_timestamp;
>      current_latency = u->latency_snapshot.sink_latency + current_buffer_latency + base_rate * u->latency_snapshot.source_latency / old_rate - snapshot_delay;
>  
> -    final_latency = u->latency;
> -
>      /* Latency and latency difference at base rate */
>      corrected_latency = u->latency_snapshot.source_latency + (u->latency_snapshot.sink_latency + current_buffer_latency) * old_rate / base_rate - snapshot_delay;
> +
> +    source_sink_latency = u->sink_latency_sum / u->sink_adjust_counter +
> +                          u->source_latency_sum / u->source_adjust_counter;
> +    final_latency = PA_MAX(u->latency, source_sink_latency + u->buffer_latency);
>      latency_difference = (int32_t)(corrected_latency - final_latency);

If I understand correctly, here you fix the target latency to something
more sensible if the user-configured latency is impossible to reach.
Could this be put into a separate patch? This patch is pretty painful
to review, so further splitting would be welcome.

>  
>      pa_log_debug("Loopback overall latency is %0.2f ms + %0.2f ms + %0.2f ms = %0.2f ms (at the base rate: %0.2f ms, old estimate: %0.2f ms)",
> @@ -227,6 +248,8 @@ static void adjust_rates(struct userdata *u) {
>                  (double) latency_difference / PA_USEC_PER_MSEC,
>                  (int32_t)(old_rate - base_rate));
>  
> +    u->source_sink_changed = false;
> +
>      /* Calculate new rate */
>      new_rate = rate_controller(base_rate, u->adjust_time, latency_difference);
>  
> @@ -253,11 +276,14 @@ static void time_callback(pa_mainloop_api *a, pa_time_event *e, const struct tim
>      adjust_rates(u);
>  }
>  
> -/* Called from main context */
> +/* Called from main context
> + * When source or sink changes, give it a third of a second to settle down, then call adjust_rates for the first time */
>  static void enable_adjust_timer(struct userdata *u, bool enable) {
>      if (enable) {
> -        if (u->time_event || u->adjust_time <= 0)
> +        if (!u->adjust_time)
>              return;
> +        if (u->time_event)
> +            u->core->mainloop->time_free(u->time_event);
>  
>          u->time_event = pa_core_rttime_new(u->module->core, pa_rtclock_now() + 333 * PA_USEC_PER_MSEC, time_callback, u);
>      } else {
> @@ -277,29 +303,58 @@ static void update_adjust_timer(struct userdata *u) {
>          enable_adjust_timer(u, true);
>  }
>  
> +static pa_usec_t get_requested_latency(struct userdata *u) {
> +
> +    return PA_MAX(u->configured_sink_latency + u->buffer_latency, u->latency);
> +}
> +
> +/* Called from all contexts */
> +static void memblockq_adjust(struct userdata *u, int32_t offset, bool allow_push) {
> +    size_t memblock_bytes, requested_buffer_bytes;
> +    pa_usec_t requested_buffer_latency;
> +    size_t buffer_offset;
> +    pa_memchunk silence;
> +
> +    requested_buffer_latency = get_requested_latency(u);
> +    if (offset > 0)
> +       requested_buffer_latency = PA_CLIP_SUB(requested_buffer_latency, (pa_usec_t)offset);
> +    else
> +       requested_buffer_latency = requested_buffer_latency - offset;
> +
> +    requested_buffer_bytes = pa_usec_to_bytes(requested_buffer_latency, &u->sink_input->sample_spec);
> +    memblock_bytes = pa_memblockq_get_length(u->memblockq);
> +
> +    /* Drop audio from queue */
> +    if ((int32_t)(memblock_bytes - requested_buffer_bytes) > 0) {
> +       buffer_offset = memblock_bytes - requested_buffer_bytes;
> +       pa_log_info("Dropping %zd bytes from queue", buffer_offset);
> +       pa_memblockq_drop(u->memblockq, buffer_offset);
> +    }
> +    /* Add silence to queue, will never happen from IO-thread */
> +    else if ((int32_t)(memblock_bytes - requested_buffer_bytes) < 0 && allow_push) {
> +       requested_buffer_bytes = requested_buffer_bytes - memblock_bytes;
> +       pa_log_info("Adding %zd bytes of silence to queue", requested_buffer_bytes);
> +       pa_sink_input_get_silence(u->sink_input, &silence);
> +       while (requested_buffer_bytes >= silence.length) {
> +          pa_memblockq_push_align(u->memblockq, &silence);
> +          requested_buffer_bytes -= silence.length;
> +       }
> +       if (requested_buffer_bytes > 0) {
> +          silence.length = requested_buffer_bytes;
> +          pa_memblockq_push_align(u->memblockq, &silence);
> +       }
> +       pa_memblock_unref(silence.memblock);
> +    }
> +}
> +
>  /* Called from input thread context */
>  static void source_output_push_cb(pa_source_output *o, const pa_memchunk *chunk) {
>      struct userdata *u;
> -    pa_memchunk copy;
>  
>      pa_source_output_assert_ref(o);
>      pa_source_output_assert_io_context(o);
>      pa_assert_se(u = o->userdata);
>  
> -    if (u->skip >= chunk->length) {
> -        u->skip -= chunk->length;
> -        return;
> -    }
> -
> -    if (u->skip > 0) {
> -        copy = *chunk;
> -        copy.index += u->skip;
> -        copy.length -= u->skip;
> -        u->skip = 0;
> -
> -        chunk = ©
> -    }
> -
>      pa_asyncmsgq_post(u->asyncmsgq, PA_MSGOBJECT(u->sink_input), SINK_INPUT_MESSAGE_POST, NULL, 0, chunk, NULL);
>      u->send_counter += (int64_t) chunk->length;
>  }
> @@ -339,6 +394,33 @@ static int source_output_process_msg_cb(pa_msgobject *obj, int code, void *data,
>      return pa_source_output_process_msg(obj, code, data, offset, chunk);
>  }
>  
> +static void set_source_output_latency(struct userdata *u, pa_source *source) {
> +     pa_usec_t min_latency, max_latency, buffer_msec, latency;
> +
> +    /* Set lower limit of source latency to 2.333 ms */
> +    latency = PA_MAX(u->latency / 3, 2.333 * PA_USEC_PER_MSEC);

Where does that 2.333 ms come from? Defining a constant for the minimum
source latency would be good.

> +
> +    if(source->flags & PA_SOURCE_DYNAMIC_LATENCY) {
> +       pa_source_get_latency_range(source, &min_latency, &max_latency);
> +       if (min_latency > latency) {
> +          u->buffer_latency = PA_MAX(u->buffer_latency, (pa_usec_t)(min_latency * 0.75));
> +          pa_log_warn("Cannot set requested source latency, adjusting buffer to %0.2f ms", (double)u->buffer_latency / PA_USEC_PER_MSEC);
> +       }
> +       latency = PA_CLAMP(latency, min_latency, max_latency);
> +    }
> +    else {
> +       latency = pa_source_get_latency(source);
> +       if (latency == 0)
> +          latency = pa_source_get_fixed_latency(source);
> +       buffer_msec = u->core->default_fragment_size_msec + DEFAULT_BUFFER_MARGIN_MSEC;
> +       if (u->buffer_latency < buffer_msec * PA_USEC_PER_MSEC) {
> +          pa_log_warn("Fixed latency device, setting buffer latency to %zd.00 ms", buffer_msec);
> +          u->buffer_latency = buffer_msec * PA_USEC_PER_MSEC;
> +       }
> +    }
> +    u->configured_source_latency = pa_source_output_set_requested_latency(u->source_output, latency);
> +}
> +
>  /* Called from output thread context */
>  static void source_output_attach_cb(pa_source_output *o) {
>      struct userdata *u;
> @@ -365,24 +447,10 @@ static void source_output_detach_cb(pa_source_output *o) {
>          pa_rtpoll_item_free(u->rtpoll_item_write);
>          u->rtpoll_item_write = NULL;
>      }
> -}
> -
> -/* Called from output thread context */
> -static void source_output_state_change_cb(pa_source_output *o, pa_source_output_state_t state) {
> -    struct userdata *u;
> -
> -    pa_source_output_assert_ref(o);
> -    pa_source_output_assert_io_context(o);
> -    pa_assert_se(u = o->userdata);
> -
> -    if (PA_SOURCE_OUTPUT_IS_LINKED(state) && o->thread_info.state == PA_SOURCE_OUTPUT_INIT) {
> -
> -        u->skip = pa_usec_to_bytes(PA_CLIP_SUB(pa_source_get_latency_within_thread(o->source),
> -                                               u->latency),
> -                                   &o->sample_spec);
> -
> -        pa_log_info("Skipping %lu bytes", (unsigned long) u->skip);
> -    }
> +   u->source_sink_changed = true;
> +   u->source_latency_sum = 0;
> +   u->source_adjust_counter = 0;
> +   u->buffer_latency = u->initial_buffer_latency;
>  }
>  
>  /* Called from main thread */
> @@ -408,7 +476,12 @@ static bool source_output_may_move_to_cb(pa_source_output *o, pa_source *dest) {
>      if (!u->sink_input || !u->sink_input->sink)
>          return true;
>  
> -    return dest != u->sink_input->sink->monitor_source;
> +    /* We may still be adjusting, so reset rate to default before moving the source */
> +    if (dest != u->sink_input->sink->monitor_source) {
> +       pa_sink_input_set_rate(u->sink_input, u->source_output->sample_spec.rate);
> +       return true;
> +    }
> +    return false;
>  }
>  
>  /* Called from main thread */
> @@ -416,6 +489,7 @@ static void source_output_moving_cb(pa_source_output *o, pa_source *dest) {
>      pa_proplist *p;
>      const char *n;
>      struct userdata *u;
> +    pa_usec_t sink_latency;
>  
>      if (!dest)
>          return;
> @@ -433,6 +507,29 @@ static void source_output_moving_cb(pa_source_output *o, pa_source *dest) {
>      pa_sink_input_update_proplist(u->sink_input, PA_UPDATE_REPLACE, p);
>      pa_proplist_free(p);
>  
> +    /* Set latency and calculate necessary buffer length
> +     * In some profile switching situations the sink will be invalid here. If so,
> +     * skip the buffer adjustment, it will be done when the sink becomes valid */
> +    set_source_output_latency(u, dest);
> +

The comment confused me, because I first thought it referred only to
the set_source_output_latency() call. Adding an empty line between the
comment and the function call should help. The same goes for the sink
input side.

I'm going to sleep now. I can continue reviewing this patch tomorrow,
or I can do it when you send v2, if you split this into more manageable
pieces (I'd prefer the latter). Which one do you think is better?

-- 
Tanu