[pulseaudio-discuss] [PATCH] core: New LIFO style flist implementation

Jyri Sarha oku at iki.fi
Tue Nov 16 06:01:45 PST 2010



On Fri, 20 Aug 2010, Colin Guthrie wrote:

> 'Twas brillig, and oku at iki.fi at 20/08/10 14:24 did gyre and gimble:
>> From: Jyri Sarha <jyri.sarha at nokia.com>
>>
>> It seems the mail archive at 0pointer does not show attachments of type
>> "Text/X-PATCH", so here this goes again.
>
> It's OK, it came through on gmane and no doubt to list subscribers.
>
> Lennart is probably the best person to review this as I don't feel
> confident accepting it without fully grokking the reasons for the
> original implem vs. this one.
>
> As he's currently distracted by systemd it may take him a while to
> reply, but he will defo get to it in due course :)
>

I browsed the pulseaudio-discuss mail archives and noticed that this issue 
was left hanging in the air also on my side. At least I could not find the
the latest version of this patch anywhere. So here it comes just in case
Lennart would have time to take a look at it.

To give some extra motivation, with this patch you can solve issues like 
this :) : https://bugs.maemo.org/show_bug.cgi?id=7190

Cheers,
Jyri

--------------------------------------------------------------------------
>From 12563ed356f3bb81f99aa32ba45337cfdf427b50 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jyri.sarha at nokia.com>
Date: Mon, 30 Nov 2009 16:55:27 +0200
Subject: [PATCH] core: New LIFO style flist implementation v2.2

The old free list implementation used objects in FIFO style. This is
bad because it tries keep all the objects ever used alive and in
memory. This minimizes the changes that an allocated object is already
in cache. When there is shortage of physical memory this may also
increase change that newly allocated object is swapped out. LIFO
(e.g. stack) style free list should help these issues. Like the old
one the new implementation is also lock free. This version (v2.1) of
the patch has a potential weakness fixed. The previous version (2.0)
did segfault when popping from empty flist, this does not.
---
  src/pulsecore/flist.c |  218 ++++++++++++++----------------------------------
  1 files changed, 64 insertions(+), 154 deletions(-)

diff --git a/src/pulsecore/flist.c b/src/pulsecore/flist.c
index 7e5ee24..e728d30 100644
--- a/src/pulsecore/flist.c
+++ b/src/pulsecore/flist.c
@@ -1,7 +1,9 @@
  /***
    This file is part of PulseAudio.

-  Copyright 2006-2008 Lennart Poettering
+  Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+
+  Contact: Jyri Sarha <Jyri.Sarha at nokia.com>

    PulseAudio is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as
@@ -34,198 +36,106 @@

  #include "flist.h"

-/* Algorithm is not perfect, in a few corner cases it will fail to pop
- * from the flist although it isn't empty, and fail to push into the
- * flist, although it isn't full.
- *
- * We keep a fixed size array of entries, each item is an atomic
- * pointer.
- *
- * To accelerate finding of used/unused cells we maintain a read and a
- * write index which is used like a ring buffer. After each push we
- * increase the write index and after each pop we increase the read
- * index.
- *
- * The indexes are incremented atomically and are never truncated to
- * the buffer size. Instead we assume that the buffer size is a power
- * of two and that the truncation can thus be done by applying a
- * simple AND on read.
- *
- * To make sure that we do not look for empty cells indefinitely we
- * maintain a length value which stores the number of used cells. From
- * this value the number of unused cells is easily calculated. Please
- * note that the length value is not updated atomically with the read
- * and write index and might thus be a few cells off the real
- * value. To deal with this we always look for N_EXTRA_SCAN extra
- * cells when pushing/popping entries.
- *
- * It might make sense to replace this implementation with a link list
- * stack or queue, which however requires DCAS to be simple. Patches
- * welcome.
- *
- * Please note that this algorithm is home grown.*/
-
  #define FLIST_SIZE 128
-#define N_EXTRA_SCAN 3

-/* For debugging purposes we can define _Y to put and extra thread
- * yield between each operation. */
+/* Lock free single linked list element. */
+struct pa_flist_elem {
+    pa_atomic_ptr_t next;
+    pa_atomic_ptr_t ptr;
+};

-#ifdef PROFILE
-#define _Y pa_thread_yield()
-#else
-#define _Y do { } while(0)
-#endif
+typedef struct pa_flist_elem pa_flist_elem;

  struct pa_flist {
      unsigned size;
-    pa_atomic_t length;
-    pa_atomic_t read_idx;
-    pa_atomic_t write_idx;
+    /* Stack that contains pointers stored into free list */
+    pa_atomic_ptr_t stored;
+    /* Stack that contains empty list elements */
+    pa_atomic_ptr_t empty;
+    pa_flist_elem table[0];
  };

-#define PA_FLIST_CELLS(x) ((pa_atomic_ptr_t*) ((uint8_t*) (x) + PA_ALIGN(sizeof(struct pa_flist))))
+/* Lock free pop from linked list stack */
+static pa_flist_elem *stack_pop(pa_atomic_ptr_t *list) {
+    pa_flist_elem *poped;
+    pa_assert(list);
+
+    do {
+        poped = (pa_flist_elem *) pa_atomic_ptr_load(list);
+    } while (poped != NULL && !pa_atomic_ptr_cmpxchg(list, poped, pa_atomic_ptr_load(&poped->next)));
+
+    return poped;
+}
+
+/* Lock free push to linked list stack */
+static void stack_push(pa_atomic_ptr_t *list, pa_flist_elem *new_elem) {
+    pa_flist_elem *next;
+    pa_assert(list);
+
+    do {
+        next = pa_atomic_ptr_load(list);
+        pa_atomic_ptr_store(&new_elem->next, next);
+    } while (!pa_atomic_ptr_cmpxchg(list, next, new_elem));
+}

  pa_flist *pa_flist_new(unsigned size) {
      pa_flist *l;
+    unsigned i;

      if (!size)
          size = FLIST_SIZE;

-    pa_assert(pa_is_power_of_two(size));
-
-    l = pa_xmalloc0(PA_ALIGN(sizeof(pa_flist)) + (sizeof(pa_atomic_ptr_t) * size));
+    l = pa_xmalloc0(sizeof(pa_flist) + sizeof(pa_flist_elem) * size);

      l->size = size;
-
-    pa_atomic_store(&l->read_idx, 0);
-    pa_atomic_store(&l->write_idx, 0);
-    pa_atomic_store(&l->length, 0);
-
+    pa_atomic_ptr_store(&l->stored, NULL);
+    pa_atomic_ptr_store(&l->empty, NULL);
+    for (i=0; i < size; i++) {
+        stack_push(&l->empty, &l->table[i]);
+    }
      return l;
  }

-static unsigned reduce(pa_flist *l, unsigned value) {
-    return value & (l->size - 1);
-}
-
  void pa_flist_free(pa_flist *l, pa_free_cb_t free_cb) {
      pa_assert(l);

      if (free_cb) {
-        pa_atomic_ptr_t*cells;
-        unsigned idx;
-
-        cells = PA_FLIST_CELLS(l);
-
-        for (idx = 0; idx < l->size; idx ++) {
-            void *p;
-
-            if ((p = pa_atomic_ptr_load(&cells[idx])))
-                free_cb(p);
-        }
+        pa_flist_elem *elem;
+        while((elem = stack_pop(&l->stored)))
+            free_cb(pa_atomic_ptr_load(&elem->ptr));
      }

      pa_xfree(l);
  }

-int pa_flist_push(pa_flist*l, void *p) {
-    unsigned idx, n;
-    pa_atomic_ptr_t*cells;
-#ifdef PROFILE
-    unsigned len;
-#endif
-
+int pa_flist_push(pa_flist *l, void *p) {
+    pa_flist_elem *elem;
      pa_assert(l);
      pa_assert(p);

-    cells = PA_FLIST_CELLS(l);
-
-    n = l->size + N_EXTRA_SCAN - (unsigned) pa_atomic_load(&l->length);
-
-#ifdef PROFILE
-    len = n;
-#endif
-
-    _Y;
-    idx = reduce(l, (unsigned) pa_atomic_load(&l->write_idx));
-
-    for (; n > 0 ; n--) {
-
-        _Y;
-
-        if (pa_atomic_ptr_cmpxchg(&cells[idx], NULL, p)) {
-
-            _Y;
-            pa_atomic_inc(&l->write_idx);
-
-            _Y;
-            pa_atomic_inc(&l->length);
-
-            return 0;
-        }
-
-        _Y;
-        idx = reduce(l, idx + 1);
+    elem = stack_pop(&l->empty);
+    if (elem == NULL) {
+        pa_log_warn("flist is full");
+        return -1;
      }
+    pa_atomic_ptr_store(&elem->ptr, p);
+    stack_push(&l->stored, elem);

-#ifdef PROFILE
-    if (len > N_EXTRA_SCAN)
-        pa_log_warn("Didn't  find free cell after %u iterations.", len);
-#endif
-
-    return -1;
+    return 0;
  }

-void* pa_flist_pop(pa_flist*l) {
-    unsigned idx, n;
-    pa_atomic_ptr_t *cells;
-#ifdef PROFILE
-    unsigned len;
-#endif
-
+void* pa_flist_pop(pa_flist *l) {
+    pa_flist_elem *elem;
+    void *ptr;
      pa_assert(l);

-    cells = PA_FLIST_CELLS(l);
-
-    n = (unsigned) pa_atomic_load(&l->length) + N_EXTRA_SCAN;
+    elem = stack_pop(&l->stored);
+    if (elem == NULL)
+        return NULL;

-#ifdef PROFILE
-    len = n;
-#endif
-
-    _Y;
-    idx = reduce(l, (unsigned) pa_atomic_load(&l->read_idx));
-
-    for (; n > 0 ; n--) {
-        void *p;
-
-        _Y;
-        p = pa_atomic_ptr_load(&cells[idx]);
-
-        if (p) {
-
-            _Y;
-            if (!pa_atomic_ptr_cmpxchg(&cells[idx], p, NULL))
-                continue;
+    ptr = pa_atomic_ptr_load(&elem->ptr);

-            _Y;
-            pa_atomic_inc(&l->read_idx);
-
-            _Y;
-            pa_atomic_dec(&l->length);
-
-            return p;
-        }
-
-        _Y;
-        idx = reduce(l, idx+1);
-    }
-
-#ifdef PROFILE
-    if (len > N_EXTRA_SCAN)
-        pa_log_warn("Didn't find used cell after %u iterations.", len);
-#endif
+    stack_push(&l->empty, elem);

-    return NULL;
+    return ptr;
  }
-- 
1.7.1


-------------- next part --------------
From 12563ed356f3bb81f99aa32ba45337cfdf427b50 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jyri.sarha at nokia.com>
Date: Mon, 30 Nov 2009 16:55:27 +0200
Subject: [PATCH] core: New LIFO style flist implementation v2.2

The old free list implementation used objects in FIFO style. This is
bad because it tries keep all the objects ever used alive and in
memory. This minimizes the changes that an allocated object is already
in cache. When there is shortage of physical memory this may also
increase change that newly allocated object is swapped out. LIFO
(e.g. stack) style free list should help these issues. Like the old
one the new implementation is also lock free. This version (v2.1) of
the patch has a potential weakness fixed. The previous version (2.0)
did segfault when popping from empty flist, this does not.
---
 src/pulsecore/flist.c |  218 ++++++++++++++----------------------------------
 1 files changed, 64 insertions(+), 154 deletions(-)

diff --git a/src/pulsecore/flist.c b/src/pulsecore/flist.c
index 7e5ee24..e728d30 100644
--- a/src/pulsecore/flist.c
+++ b/src/pulsecore/flist.c
@@ -1,7 +1,9 @@
 /***
   This file is part of PulseAudio.
 
-  Copyright 2006-2008 Lennart Poettering
+  Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+
+  Contact: Jyri Sarha <Jyri.Sarha at nokia.com>
 
   PulseAudio is free software; you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as
@@ -34,198 +36,106 @@
 
 #include "flist.h"
 
-/* Algorithm is not perfect, in a few corner cases it will fail to pop
- * from the flist although it isn't empty, and fail to push into the
- * flist, although it isn't full.
- *
- * We keep a fixed size array of entries, each item is an atomic
- * pointer.
- *
- * To accelerate finding of used/unused cells we maintain a read and a
- * write index which is used like a ring buffer. After each push we
- * increase the write index and after each pop we increase the read
- * index.
- *
- * The indexes are incremented atomically and are never truncated to
- * the buffer size. Instead we assume that the buffer size is a power
- * of two and that the truncation can thus be done by applying a
- * simple AND on read.
- *
- * To make sure that we do not look for empty cells indefinitely we
- * maintain a length value which stores the number of used cells. From
- * this value the number of unused cells is easily calculated. Please
- * note that the length value is not updated atomically with the read
- * and write index and might thus be a few cells off the real
- * value. To deal with this we always look for N_EXTRA_SCAN extra
- * cells when pushing/popping entries.
- *
- * It might make sense to replace this implementation with a link list
- * stack or queue, which however requires DCAS to be simple. Patches
- * welcome.
- *
- * Please note that this algorithm is home grown.*/
-
 #define FLIST_SIZE 128
-#define N_EXTRA_SCAN 3
 
-/* For debugging purposes we can define _Y to put and extra thread
- * yield between each operation. */
+/* Lock free single linked list element. */
+struct pa_flist_elem {
+    pa_atomic_ptr_t next;
+    pa_atomic_ptr_t ptr;
+};
 
-#ifdef PROFILE
-#define _Y pa_thread_yield()
-#else
-#define _Y do { } while(0)
-#endif
+typedef struct pa_flist_elem pa_flist_elem;
 
 struct pa_flist {
     unsigned size;
-    pa_atomic_t length;
-    pa_atomic_t read_idx;
-    pa_atomic_t write_idx;
+    /* Stack that contains pointers stored into free list */
+    pa_atomic_ptr_t stored;
+    /* Stack that contains empty list elements */
+    pa_atomic_ptr_t empty;
+    pa_flist_elem table[0];
 };
 
-#define PA_FLIST_CELLS(x) ((pa_atomic_ptr_t*) ((uint8_t*) (x) + PA_ALIGN(sizeof(struct pa_flist))))
+/* Lock free pop from linked list stack */
+static pa_flist_elem *stack_pop(pa_atomic_ptr_t *list) {
+    pa_flist_elem *poped;
+    pa_assert(list);
+
+    do {
+        poped = (pa_flist_elem *) pa_atomic_ptr_load(list);
+    } while (poped != NULL && !pa_atomic_ptr_cmpxchg(list, poped, pa_atomic_ptr_load(&poped->next)));
+
+    return poped;
+}
+
+/* Lock free push to linked list stack */
+static void stack_push(pa_atomic_ptr_t *list, pa_flist_elem *new_elem) {
+    pa_flist_elem *next;
+    pa_assert(list);
+
+    do {
+        next = pa_atomic_ptr_load(list);
+        pa_atomic_ptr_store(&new_elem->next, next);
+    } while (!pa_atomic_ptr_cmpxchg(list, next, new_elem));
+}
 
 pa_flist *pa_flist_new(unsigned size) {
     pa_flist *l;
+    unsigned i;
 
     if (!size)
         size = FLIST_SIZE;
 
-    pa_assert(pa_is_power_of_two(size));
-
-    l = pa_xmalloc0(PA_ALIGN(sizeof(pa_flist)) + (sizeof(pa_atomic_ptr_t) * size));
+    l = pa_xmalloc0(sizeof(pa_flist) + sizeof(pa_flist_elem) * size);
 
     l->size = size;
-
-    pa_atomic_store(&l->read_idx, 0);
-    pa_atomic_store(&l->write_idx, 0);
-    pa_atomic_store(&l->length, 0);
-
+    pa_atomic_ptr_store(&l->stored, NULL);
+    pa_atomic_ptr_store(&l->empty, NULL);
+    for (i=0; i < size; i++) {
+        stack_push(&l->empty, &l->table[i]);
+    }
     return l;
 }
 
-static unsigned reduce(pa_flist *l, unsigned value) {
-    return value & (l->size - 1);
-}
-
 void pa_flist_free(pa_flist *l, pa_free_cb_t free_cb) {
     pa_assert(l);
 
     if (free_cb) {
-        pa_atomic_ptr_t*cells;
-        unsigned idx;
-
-        cells = PA_FLIST_CELLS(l);
-
-        for (idx = 0; idx < l->size; idx ++) {
-            void *p;
-
-            if ((p = pa_atomic_ptr_load(&cells[idx])))
-                free_cb(p);
-        }
+        pa_flist_elem *elem;
+        while((elem = stack_pop(&l->stored)))
+            free_cb(pa_atomic_ptr_load(&elem->ptr));
     }
 
     pa_xfree(l);
 }
 
-int pa_flist_push(pa_flist*l, void *p) {
-    unsigned idx, n;
-    pa_atomic_ptr_t*cells;
-#ifdef PROFILE
-    unsigned len;
-#endif
-
+int pa_flist_push(pa_flist *l, void *p) {
+    pa_flist_elem *elem;
     pa_assert(l);
     pa_assert(p);
 
-    cells = PA_FLIST_CELLS(l);
-
-    n = l->size + N_EXTRA_SCAN - (unsigned) pa_atomic_load(&l->length);
-
-#ifdef PROFILE
-    len = n;
-#endif
-
-    _Y;
-    idx = reduce(l, (unsigned) pa_atomic_load(&l->write_idx));
-
-    for (; n > 0 ; n--) {
-
-        _Y;
-
-        if (pa_atomic_ptr_cmpxchg(&cells[idx], NULL, p)) {
-
-            _Y;
-            pa_atomic_inc(&l->write_idx);
-
-            _Y;
-            pa_atomic_inc(&l->length);
-
-            return 0;
-        }
-
-        _Y;
-        idx = reduce(l, idx + 1);
+    elem = stack_pop(&l->empty);
+    if (elem == NULL) {
+        pa_log_warn("flist is full");
+        return -1;
     }
+    pa_atomic_ptr_store(&elem->ptr, p);
+    stack_push(&l->stored, elem);
 
-#ifdef PROFILE
-    if (len > N_EXTRA_SCAN)
-        pa_log_warn("Didn't  find free cell after %u iterations.", len);
-#endif
-
-    return -1;
+    return 0;
 }
 
-void* pa_flist_pop(pa_flist*l) {
-    unsigned idx, n;
-    pa_atomic_ptr_t *cells;
-#ifdef PROFILE
-    unsigned len;
-#endif
-
+void* pa_flist_pop(pa_flist *l) {
+    pa_flist_elem *elem;
+    void *ptr;
     pa_assert(l);
 
-    cells = PA_FLIST_CELLS(l);
-
-    n = (unsigned) pa_atomic_load(&l->length) + N_EXTRA_SCAN;
+    elem = stack_pop(&l->stored);
+    if (elem == NULL)
+        return NULL;
 
-#ifdef PROFILE
-    len = n;
-#endif
-
-    _Y;
-    idx = reduce(l, (unsigned) pa_atomic_load(&l->read_idx));
-
-    for (; n > 0 ; n--) {
-        void *p;
-
-        _Y;
-        p = pa_atomic_ptr_load(&cells[idx]);
-
-        if (p) {
-
-            _Y;
-            if (!pa_atomic_ptr_cmpxchg(&cells[idx], p, NULL))
-                continue;
+    ptr = pa_atomic_ptr_load(&elem->ptr);
 
-            _Y;
-            pa_atomic_inc(&l->read_idx);
-
-            _Y;
-            pa_atomic_dec(&l->length);
-
-            return p;
-        }
-
-        _Y;
-        idx = reduce(l, idx+1);
-    }
-
-#ifdef PROFILE
-    if (len > N_EXTRA_SCAN)
-        pa_log_warn("Didn't find used cell after %u iterations.", len);
-#endif
+    stack_push(&l->empty, elem);
 
-    return NULL;
+    return ptr;
 }
-- 
1.7.1



More information about the pulseaudio-discuss mailing list