[pulseaudio-discuss] [PATCH DEBUG] pulse: Enable memfd for client palyback

Ahmed S. Darwish darwish.07 at gmail.com
Sun Sep 20 18:20:01 PDT 2015


Hi,

This is a debugging patch I'm sending to ask for help for a bug
I'm currently facing when enabling memfd for client playback audio.

In this patch, I simply transform the client playback mempool
(context->mempool) to be backed by memfd shared memory instead of
posix SHM one.

Using this setup, and when I run a gstreamer client along with a
client using the pa simple APIs, a certain misbehavior occurs in
our pstreams do_read() code.

For background, memfd blocks transfer over pstreams work now like
this:

- The pstream descriptor is filled with necessary fields like:
  # descriptor flags (PA_PSTREAM_DESCRIPTOR_FLAGS) implying that
  we're marshalling a memfd block through (PA_FLAG_MEMFD_FD)
  # descriptor payload length (PA_PSTREAM_DESCRIPTOR_LENGTH), which
  is the `memfd_info' array length

- The file-descriptor backing the memfd block is then sent using
  unix socket ancilary data (SCM_RIGHTS). Two fields are sent:
  nfd, number of fds to be sent (1), and the fd array itself (fds[0],
  containing memfd fd). All of this is done using diwic's quite nice
  fd-passing abstractions

When srbchannels were converted to use memfds, the above sequence
worked perfectly.

when doing so for the playback mempool, sometimes pstreams code
reach do_read() with a flag PA_FLAG_MEMFD_FD (implying that this is
indeed a marshalled memfd-block frame), but with a reset ancillary
data field (nfd = 0).

After a lot of debugging, using below patch as an example, I found
that _even in the error case_ of nfd=0, the memfd-block frame was
received correctly:

- descriptor flags field denotes a marshalled memfd block
- descriptor payload length indeed shows memfd_info array length
- ancillary data nfd = 1, with fds[0] containing our kernel-passed
  fd

But by the time the full frame is complete, [*] someone has reset
the pstreams ancillary data behind my back. Re-examining all the
descriptor fields, and descriptor payload fields (memfd_info), shows
no signs of data corruption at all.

So, who's changing the `pstream->read_ancillary_data' behind our
back? Is it the memblock shm revoke/release callbacks? This issue
does not happen all the time, but is 100%-reproducible when a
gstreamer client runs in parallel with a simple API client.

Any advice on how to debug this issue further?

Thanks a lot :-)

[*] (re->index >= ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_LENGTH]) + PA_PSTREAM_DESCRIPTOR_SIZE)

Debugging patch:

---

diff --git a/src/pulse/context.c b/src/pulse/context.c
index f272cd6..14af5e8 100644
--- a/src/pulse/context.c
+++ b/src/pulse/context.c
@@ -62,6 +62,7 @@
 #include <pulsecore/creds.h>
 #include <pulsecore/macro.h>
 #include <pulsecore/proplist-util.h>
+#include <pulsecore/memfd.h>
 
 #include "internal.h"
 #include "context.h"
@@ -125,7 +126,7 @@ static void reset_callbacks(pa_context *c) {
 
 pa_context *pa_context_new_with_proplist(pa_mainloop_api *mainloop, const char *name, pa_proplist *p) {
     pa_context *c;
-    pa_mem_type_t type;
+    pa_mem_type_t mem_type;
 
     pa_assert(mainloop);
 
@@ -171,9 +172,14 @@ pa_context *pa_context_new_with_proplist(pa_mainloop_api *mainloop, const char *
     c->srb_template.readfd = -1;
     c->srb_template.writefd = -1;
 
-    type = !c->conf->disable_shm ? PA_MEMORY_SHARED_POSIX : PA_MEMORY_PRIVATE;
-    if (!(c->mempool = pa_mempool_new(type, c->conf->shm_size))) {
+    if (c->conf->disable_shm)
+        mem_type = PA_MEMORY_PRIVATE;
+    else if (pa_memfd_is_supported())
+        mem_type = PA_MEMORY_SHARED_MEMFD;
+    else
+        mem_type = PA_MEMORY_SHARED_POSIX;
 
+    if (!(c->mempool = pa_mempool_new(mem_type, c->conf->shm_size))) {
         if (!c->conf->disable_shm) {
             pa_log_warn("Failed to allocate shared memory pool. Falling back to a normal private one.");
             c->mempool = pa_mempool_new(PA_MEMORY_PRIVATE, c->conf->shm_size);
diff --git a/src/pulsecore/pstream.c b/src/pulsecore/pstream.c
index a65b0a6..6e12221 100644
--- a/src/pulsecore/pstream.c
+++ b/src/pulsecore/pstream.c
@@ -58,6 +58,7 @@ enum {
     PA_PSTREAM_DESCRIPTOR_OFFSET_HI,
     PA_PSTREAM_DESCRIPTOR_OFFSET_LO,
     PA_PSTREAM_DESCRIPTOR_FLAGS,
+    PA_PSTREAM_DESCRIPTOR_MAGIC,
     PA_PSTREAM_DESCRIPTOR_MAX
 };
 
@@ -524,11 +525,14 @@ static void prepare_next_write_item(pa_pstream *p) {
     p->write.minibuf_validsize = 0;
     pa_memchunk_reset(&p->write.memchunk);
 
+    static pa_atomic_t magic = PA_ATOMIC_INIT(100);
+
     p->write.descriptor[PA_PSTREAM_DESCRIPTOR_LENGTH] = 0;
     p->write.descriptor[PA_PSTREAM_DESCRIPTOR_CHANNEL] = htonl((uint32_t) -1);
     p->write.descriptor[PA_PSTREAM_DESCRIPTOR_OFFSET_HI] = 0;
     p->write.descriptor[PA_PSTREAM_DESCRIPTOR_OFFSET_LO] = 0;
     p->write.descriptor[PA_PSTREAM_DESCRIPTOR_FLAGS] = 0;
+    p->write.descriptor[PA_PSTREAM_DESCRIPTOR_MAGIC] = htonl(pa_atomic_inc(&magic));
 
     if (p->write.current->type == PA_PSTREAM_ITEM_PACKET) {
         size_t plen;
@@ -826,6 +830,25 @@ static int do_read(pa_pstream *p, struct pstream_read *re) {
         if ((r = pa_iochannel_read_with_ancil_data(p->io, d, l, &b)) <= 0)
             goto fail;
 
+        /* This implies that we get the pstream descriptor out of the first read() system call.
+         * This is usually the case, so no need for making our debugging code more complicated. */
+        if (re->index == 0 && r == PA_PSTREAM_DESCRIPTOR_SIZE) {
+            int flags = ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_FLAGS]);
+            if (flags & PA_FLAG_MEMFD_FD) {
+                pa_log("@@@@@@@@");
+                pa_log("Just out of the oven memfd stream descriptor!");
+                pa_log("descriptor: MAGIC = %u", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_MAGIC]));
+                pa_log("descriptor: size = %lu", PA_PSTREAM_DESCRIPTOR_SIZE);
+                pa_log("descriptor: flags = 0x%x", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_FLAGS]));
+                pa_log("descriptor: paylod (memfd_info) size = %u", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_LENGTH]));
+                pa_log("ancil: creds_valid = %s", pa_yes_no(b.creds_valid));
+                pa_log("ancil: creds uid = %u, gid = %u", b.creds.uid, b.creds.gid);
+                pa_log("ancil: nfd = %d", b.nfd);
+                pa_log("ancil: fds[0] = %d", b.fds[0]);
+                pa_log("@@@@@@@@");
+            }
+        }
+
         if (b.creds_valid) {
             p->read_ancil_data.creds_valid = true;
             p->read_ancil_data.creds = b.creds;
@@ -974,7 +997,28 @@ static int do_read(pa_pstream *p, struct pstream_read *re) {
                                          !!(flags & PA_FLAG_SHMWRITABLE));
             } else if ((flags & PA_FLAG_SHMMASK) == PA_FLAG_MEMFD_FD) {
                 int memfd_fd;
-                pa_assert(p->read_ancil_data.nfd == 1);
+
+                if (p->read_ancil_data.nfd != 1) {
+                    pa_log_warn("error; found nfd = %d", p->read_ancil_data.nfd);
+
+                    pa_log_warn("++++++++");
+                    pa_log_error("descriptor: MAGIC = %u", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_MAGIC]));
+                    pa_log_warn("descriptor: size = %lu", PA_PSTREAM_DESCRIPTOR_SIZE);
+                    pa_log_warn("descriptor: flags = 0x%x", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_FLAGS]));
+                    pa_log_warn("descriptor: paylod (memfd_info) size = %u", ntohl(re->descriptor[PA_PSTREAM_DESCRIPTOR_LENGTH]));
+                    pa_log_warn("------");
+                    pa_log_warn("payload: memfd blk id = %u", ntohl(re->per_type.memfd_info[PA_PSTREAM_MEMFD_BLOCKID]));
+                    pa_log_warn("payload: memfd index = %u", ntohl(re->per_type.memfd_info[PA_PSTREAM_MEMFD_INDEX]));
+                    pa_log_warn("payload: memfd shared region length = %u", ntohl(re->per_type.memfd_info[PA_PSTREAM_MEMFD_LENGTH]));
+                    pa_log_warn("------");
+                    pa_log_warn("ancil: creds_valid = %s", pa_yes_no(p->read_ancil_data.creds_valid));
+                    pa_log_warn("ancil: creds uid = %u, gid = %u", p->read_ancil_data.creds.uid, p->read_ancil_data.creds.gid);
+                    pa_log_warn("ancil: nfd = %d", p->read_ancil_data.nfd);
+                    pa_log_warn("ancil: fds[0] = %d", p->read_ancil_data.fds[0]);
+                    pa_log_warn("++++++++");
+
+                    pa_assert_not_reached();
+                }
 
                 memfd_fd = p->read_ancil_data.fds[0];
                 pa_assert(memfd_fd >= 0);


More information about the pulseaudio-discuss mailing list