[Mesa-dev] [PATCH 5/7] u_queue: add a futex-based implementation of fences
Nicolai Hähnle
nhaehnle at gmail.com
Sun Oct 22 18:33:42 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
Fences are now 4 bytes instead of 96 bytes (on my 64-bit system).
Signaling a fence is a single atomic operation in the fast case plus a
syscall in the slow case.
Testing if a fence is signaled is the same as before (a simple comparison),
but waiting on a fence is now no more expensive than just testing it in
the fast (already signaled) case.
---
src/util/futex.h | 4 +++
src/util/u_queue.c | 2 ++
src/util/u_queue.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 94 insertions(+)
diff --git a/src/util/futex.h b/src/util/futex.h
index a79daaf209b..6b306bb78eb 100644
--- a/src/util/futex.h
+++ b/src/util/futex.h
@@ -35,17 +35,21 @@
static inline long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, void *addr2, int val3)
{
return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
}
static inline int futex_wake(uint32_t *addr) {
return sys_futex(addr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
+static inline int futex_wake_all(uint32_t *addr) {
+ return sys_futex(addr, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+}
+
static inline int futex_wait(uint32_t *addr, int32_t value) {
return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
}
#endif
#endif /* UTIL_FUTEX_H */
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 2272006042f..8293ec661b0 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -82,20 +82,21 @@ remove_from_atexit_list(struct util_queue *queue)
break;
}
}
mtx_unlock(&exit_mutex);
}
/****************************************************************************
* util_queue_fence
*/
+#ifdef UTIL_QUEUE_FENCE_STANDARD
void
util_queue_fence_signal(struct util_queue_fence *fence)
{
mtx_lock(&fence->mutex);
fence->signalled = true;
cnd_broadcast(&fence->cond);
mtx_unlock(&fence->mutex);
}
void
@@ -129,20 +130,21 @@ util_queue_fence_destroy(struct util_queue_fence *fence)
* by thread A, thread B is allowed to destroy it. Since
* util_queue_fence_is_signalled does not lock the fence mutex (for
* performance reasons), we must do so here.
*/
mtx_lock(&fence->mutex);
mtx_unlock(&fence->mutex);
cnd_destroy(&fence->cond);
mtx_destroy(&fence->mutex);
}
+#endif
/****************************************************************************
* util_queue implementation
*/
struct thread_input {
struct util_queue *queue;
int thread_index;
};
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index a3e12260e30..3d9f19f4e6c 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -28,30 +28,117 @@
*
* Jobs can be added from any thread. After that, the wait call can be used
* to wait for completion of the job.
*/
#ifndef U_QUEUE_H
#define U_QUEUE_H
#include <string.h>
+#include "util/futex.h"
#include "util/list.h"
+#include "util/macros.h"
#include "util/u_thread.h"
#ifdef __cplusplus
extern "C" {
#endif
#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0)
#define UTIL_QUEUE_INIT_RESIZE_IF_FULL (1 << 1)
+#if defined(__GNUC__) && defined(HAVE_FUTEX)
+#define UTIL_QUEUE_FENCE_FUTEX
+#else
+#define UTIL_QUEUE_FENCE_STANDARD
+#endif
+
+#ifdef UTIL_QUEUE_FENCE_FUTEX
+/* Job completion fence.
+ * Put this into your job structure.
+ */
+struct util_queue_fence {
+ /* The fence can be in one of three states:
+ * 0 - signaled
+ * 1 - unsignaled
+ * 2 - unsignaled, may have waiters
+ */
+ uint32_t val;
+};
+
+static inline void
+util_queue_fence_init(struct util_queue_fence *fence)
+{
+ fence->val = 0;
+}
+
+static inline void
+util_queue_fence_destroy(struct util_queue_fence *fence)
+{
+ assert(fence->val == 0);
+ /* no-op */
+}
+
+static inline void
+util_queue_fence_wait(struct util_queue_fence *fence)
+{
+ uint32_t v = fence->val;
+
+ if (likely(v == 0))
+ return;
+
+ do {
+ if (v != 2)
+ v = __sync_val_compare_and_swap(&fence->val, 1, 2);
+
+ futex_wait(&fence->val, 2);
+ v = fence->val;
+ } while(v != 0);
+}
+
+static inline void
+util_queue_fence_signal(struct util_queue_fence *fence)
+{
+ uint32_t val = __sync_lock_test_and_set(&fence->val, 0);
+
+ assert(val != 0);
+
+ if (val == 2)
+ futex_wake_all(&fence->val);
+}
+
+/**
+ * Move \p fence back into unsignalled state.
+ *
+ * \warning The caller must ensure that no other thread may currently be
+ * waiting (or about to wait) on the fence.
+ */
+static inline void
+util_queue_fence_reset(struct util_queue_fence *fence)
+{
+#ifdef NDEBUG
+ fence->val = 1;
+#else
+ uint32_t v = __sync_lock_test_and_set(&fence->val, 1);
+ assert(v == 0);
+#endif
+}
+
+static inline bool
+util_queue_fence_is_signalled(struct util_queue_fence *fence)
+{
+ return fence->val == 0;
+}
+#endif
+
+#ifdef UTIL_QUEUE_FENCE_STANDARD
/* Job completion fence.
* Put this into your job structure.
*/
struct util_queue_fence {
mtx_t mutex;
cnd_t cond;
int signalled;
};
void util_queue_fence_init(struct util_queue_fence *fence);
@@ -70,20 +157,21 @@ util_queue_fence_reset(struct util_queue_fence *fence)
{
assert(fence->signalled);
fence->signalled = 0;
}
static inline bool
util_queue_fence_is_signalled(struct util_queue_fence *fence)
{
return fence->signalled != 0;
}
+#endif
typedef void (*util_queue_execute_func)(void *job, int thread_index);
struct util_queue_job {
void *job;
struct util_queue_fence *fence;
util_queue_execute_func execute;
util_queue_execute_func cleanup;
};
--
2.11.0
More information about the mesa-dev
mailing list