[Mesa-dev] [PATCH v2 6/8] u_queue: add a futex-based implementation of fences

Nicolai Hähnle nhaehnle at gmail.com
Fri Nov 3 20:09:15 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Fences are now 4 bytes instead of 96 bytes (on my 64-bit system).

Signaling a fence is a single atomic operation in the fast case plus a
syscall in the slow case.

Testing if a fence is signaled is the same as before (a simple comparison),
but waiting on a fence is now no more expensive than just testing it in
the fast (already signaled) case.

v2:
- style fixes
- use p_atomic_xxx macros with the right barriers
---
 src/util/futex.h   |  5 +++
 src/util/u_queue.c |  2 ++
 src/util/u_queue.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

diff --git a/src/util/futex.h b/src/util/futex.h
index 142c3b62f00..fa42cf4cf59 100644
--- a/src/util/futex.h
+++ b/src/util/futex.h
@@ -36,18 +36,23 @@
 static inline long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, void *addr2, int val3)
 {
    return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
 }
 
 static inline int futex_wake(uint32_t *addr)
 {
    return sys_futex(addr, FUTEX_WAKE, 1, NULL, NULL, 0);
 }
 
+static inline int futex_wake_all(uint32_t *addr)
+{
+   return sys_futex(addr, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+}
+
 static inline int futex_wait(uint32_t *addr, int32_t value)
 {
    return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
 }
 
 #endif
 
 #endif /* UTIL_FUTEX_H */
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 2272006042f..8293ec661b0 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -82,20 +82,21 @@ remove_from_atexit_list(struct util_queue *queue)
          break;
       }
    }
    mtx_unlock(&exit_mutex);
 }
 
 /****************************************************************************
  * util_queue_fence
  */
 
+#ifdef UTIL_QUEUE_FENCE_STANDARD
 void
 util_queue_fence_signal(struct util_queue_fence *fence)
 {
    mtx_lock(&fence->mutex);
    fence->signalled = true;
    cnd_broadcast(&fence->cond);
    mtx_unlock(&fence->mutex);
 }
 
 void
@@ -129,20 +130,21 @@ util_queue_fence_destroy(struct util_queue_fence *fence)
     * by thread A, thread B is allowed to destroy it. Since
     * util_queue_fence_is_signalled does not lock the fence mutex (for
     * performance reasons), we must do so here.
     */
    mtx_lock(&fence->mutex);
    mtx_unlock(&fence->mutex);
 
    cnd_destroy(&fence->cond);
    mtx_destroy(&fence->mutex);
 }
+#endif
 
 /****************************************************************************
  * util_queue implementation
  */
 
 struct thread_input {
    struct util_queue *queue;
    int thread_index;
 };
 
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index a3e12260e30..be8e715899a 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -28,30 +28,121 @@
  *
  * Jobs can be added from any thread. After that, the wait call can be used
  * to wait for completion of the job.
  */
 
 #ifndef U_QUEUE_H
 #define U_QUEUE_H
 
 #include <string.h>
 
+#include "util/futex.h"
 #include "util/list.h"
+#include "util/macros.h"
+#include "util/u_atomic.h"
 #include "util/u_thread.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
 #define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
 
+#if defined(__GNUC__) && defined(HAVE_FUTEX)
+#define UTIL_QUEUE_FENCE_FUTEX
+#else
+#define UTIL_QUEUE_FENCE_STANDARD
+#endif
+
+#ifdef UTIL_QUEUE_FENCE_FUTEX
+/* Job completion fence.
+ * Put this into your job structure.
+ */
+struct util_queue_fence {
+   /* The fence can be in one of three states:
+    *  0 - signaled
+    *  1 - unsignaled
+    *  2 - unsignaled, may have waiters
+    */
+   uint32_t val;
+};
+
+static inline void
+util_queue_fence_init(struct util_queue_fence *fence)
+{
+   fence->val = 0;
+}
+
+static inline void
+util_queue_fence_destroy(struct util_queue_fence *fence)
+{
+   assert(fence->val == 0);
+   /* no-op */
+}
+
+static inline void
+util_queue_fence_wait(struct util_queue_fence *fence)
+{
+   uint32_t v = fence->val;
+
+   if (likely(v == 0))
+      return;
+
+   do {
+      if (v != 2) {
+         v = p_atomic_cmpxchg(&fence->val, 1, 2);
+         if (v == 0)
+            return;
+      }
+
+      futex_wait(&fence->val, 2);
+      v = fence->val;
+   } while(v != 0);
+}
+
+static inline void
+util_queue_fence_signal(struct util_queue_fence *fence)
+{
+   uint32_t val = p_atomic_xchg(&fence->val, 0);
+
+   assert(val != 0);
+
+   if (val == 2)
+      futex_wake_all(&fence->val);
+}
+
+/**
+ * Move \p fence back into unsignalled state.
+ *
+ * \warning The caller must ensure that no other thread may currently be
+ *          waiting (or about to wait) on the fence.
+ */
+static inline void
+util_queue_fence_reset(struct util_queue_fence *fence)
+{
+#ifdef NDEBUG
+   fence->val = 1;
+#else
+   uint32_t v = p_atomic_xchg(&fence->val, 1);
+   assert(v == 0);
+#endif
+}
+
+static inline bool
+util_queue_fence_is_signalled(struct util_queue_fence *fence)
+{
+   return fence->val == 0;
+}
+#endif
+
+#ifdef UTIL_QUEUE_FENCE_STANDARD
 /* Job completion fence.
  * Put this into your job structure.
  */
 struct util_queue_fence {
    mtx_t mutex;
    cnd_t cond;
    int signalled;
 };
 
 void util_queue_fence_init(struct util_queue_fence *fence);
@@ -70,20 +161,21 @@ util_queue_fence_reset(struct util_queue_fence *fence)
 {
    assert(fence->signalled);
    fence->signalled = 0;
 }
 
 static inline bool
 util_queue_fence_is_signalled(struct util_queue_fence *fence)
 {
    return fence->signalled != 0;
 }
+#endif
 
 typedef void (*util_queue_execute_func)(void *job, int thread_index);
 
 struct util_queue_job {
    void *job;
    struct util_queue_fence *fence;
    util_queue_execute_func execute;
    util_queue_execute_func cleanup;
 };
 
-- 
2.11.0



More information about the mesa-dev mailing list