[Mesa-dev] [PATCH v2 6/8] u_queue: add a futex-based implementation of fences

Marek Olšák maraeo at gmail.com
Mon Nov 6 15:40:00 UTC 2017


Acked-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Fri, Nov 3, 2017 at 9:09 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Fences are now 4 bytes instead of 96 bytes (on my 64-bit system).
>
> Signaling a fence is a single atomic operation in the fast case plus a
> syscall in the slow case.
>
> Testing if a fence is signaled is the same as before (a simple comparison),
> but waiting on a fence is now no more expensive than just testing it in
> the fast (already signaled) case.
>
> v2:
> - style fixes
> - use p_atomic_xxx macros with the right barriers
> ---
>  src/util/futex.h   |  5 +++
>  src/util/u_queue.c |  2 ++
>  src/util/u_queue.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 99 insertions(+)
>
> diff --git a/src/util/futex.h b/src/util/futex.h
> index 142c3b62f00..fa42cf4cf59 100644
> --- a/src/util/futex.h
> +++ b/src/util/futex.h
> @@ -36,18 +36,23 @@
>  static inline long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, void *addr2, int val3)
>  {
>     return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
>  }
>
>  static inline int futex_wake(uint32_t *addr)
>  {
>     return sys_futex(addr, FUTEX_WAKE, 1, NULL, NULL, 0);
>  }
>
> +static inline int futex_wake_all(uint32_t *addr)
> +{
> +   return sys_futex(addr, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
> +}
> +
>  static inline int futex_wait(uint32_t *addr, int32_t value)
>  {
>     return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
>  }
>
>  #endif
>
>  #endif /* UTIL_FUTEX_H */
> diff --git a/src/util/u_queue.c b/src/util/u_queue.c
> index 2272006042f..8293ec661b0 100644
> --- a/src/util/u_queue.c
> +++ b/src/util/u_queue.c
> @@ -82,20 +82,21 @@ remove_from_atexit_list(struct util_queue *queue)
>           break;
>        }
>     }
>     mtx_unlock(&exit_mutex);
>  }
>
>  /****************************************************************************
>   * util_queue_fence
>   */
>
> +#ifdef UTIL_QUEUE_FENCE_STANDARD
>  void
>  util_queue_fence_signal(struct util_queue_fence *fence)
>  {
>     mtx_lock(&fence->mutex);
>     fence->signalled = true;
>     cnd_broadcast(&fence->cond);
>     mtx_unlock(&fence->mutex);
>  }
>
>  void
> @@ -129,20 +130,21 @@ util_queue_fence_destroy(struct util_queue_fence *fence)
>      * by thread A, thread B is allowed to destroy it. Since
>      * util_queue_fence_is_signalled does not lock the fence mutex (for
>      * performance reasons), we must do so here.
>      */
>     mtx_lock(&fence->mutex);
>     mtx_unlock(&fence->mutex);
>
>     cnd_destroy(&fence->cond);
>     mtx_destroy(&fence->mutex);
>  }
> +#endif
>
>  /****************************************************************************
>   * util_queue implementation
>   */
>
>  struct thread_input {
>     struct util_queue *queue;
>     int thread_index;
>  };
>
> diff --git a/src/util/u_queue.h b/src/util/u_queue.h
> index a3e12260e30..be8e715899a 100644
> --- a/src/util/u_queue.h
> +++ b/src/util/u_queue.h
> @@ -28,30 +28,121 @@
>   *
>   * Jobs can be added from any thread. After that, the wait call can be used
>   * to wait for completion of the job.
>   */
>
>  #ifndef U_QUEUE_H
>  #define U_QUEUE_H
>
>  #include <string.h>
>
> +#include "util/futex.h"
>  #include "util/list.h"
> +#include "util/macros.h"
> +#include "util/u_atomic.h"
>  #include "util/u_thread.h"
>
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>
>  #define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
>  #define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
>
> +#if defined(__GNUC__) && defined(HAVE_FUTEX)
> +#define UTIL_QUEUE_FENCE_FUTEX
> +#else
> +#define UTIL_QUEUE_FENCE_STANDARD
> +#endif
> +
> +#ifdef UTIL_QUEUE_FENCE_FUTEX
> +/* Job completion fence.
> + * Put this into your job structure.
> + */
> +struct util_queue_fence {
> +   /* The fence can be in one of three states:
> +    *  0 - signaled
> +    *  1 - unsignaled
> +    *  2 - unsignaled, may have waiters
> +    */
> +   uint32_t val;
> +};
> +
> +static inline void
> +util_queue_fence_init(struct util_queue_fence *fence)
> +{
> +   fence->val = 0;
> +}
> +
> +static inline void
> +util_queue_fence_destroy(struct util_queue_fence *fence)
> +{
> +   assert(fence->val == 0);
> +   /* no-op */
> +}
> +
> +static inline void
> +util_queue_fence_wait(struct util_queue_fence *fence)
> +{
> +   uint32_t v = fence->val;
> +
> +   if (likely(v == 0))
> +      return;
> +
> +   do {
> +      if (v != 2) {
> +         v = p_atomic_cmpxchg(&fence->val, 1, 2);
> +         if (v == 0)
> +            return;
> +      }
> +
> +      futex_wait(&fence->val, 2);
> +      v = fence->val;
> +   } while(v != 0);
> +}
> +
> +static inline void
> +util_queue_fence_signal(struct util_queue_fence *fence)
> +{
> +   uint32_t val = p_atomic_xchg(&fence->val, 0);
> +
> +   assert(val != 0);
> +
> +   if (val == 2)
> +      futex_wake_all(&fence->val);
> +}
> +
> +/**
> + * Move \p fence back into unsignalled state.
> + *
> + * \warning The caller must ensure that no other thread may currently be
> + *          waiting (or about to wait) on the fence.
> + */
> +static inline void
> +util_queue_fence_reset(struct util_queue_fence *fence)
> +{
> +#ifdef NDEBUG
> +   fence->val = 1;
> +#else
> +   uint32_t v = p_atomic_xchg(&fence->val, 1);
> +   assert(v == 0);
> +#endif
> +}
> +
> +static inline bool
> +util_queue_fence_is_signalled(struct util_queue_fence *fence)
> +{
> +   return fence->val == 0;
> +}
> +#endif
> +
> +#ifdef UTIL_QUEUE_FENCE_STANDARD
>  /* Job completion fence.
>   * Put this into your job structure.
>   */
>  struct util_queue_fence {
>     mtx_t mutex;
>     cnd_t cond;
>     int signalled;
>  };
>
>  void util_queue_fence_init(struct util_queue_fence *fence);
> @@ -70,20 +161,21 @@ util_queue_fence_reset(struct util_queue_fence *fence)
>  {
>     assert(fence->signalled);
>     fence->signalled = 0;
>  }
>
>  static inline bool
>  util_queue_fence_is_signalled(struct util_queue_fence *fence)
>  {
>     return fence->signalled != 0;
>  }
> +#endif
>
>  typedef void (*util_queue_execute_func)(void *job, int thread_index);
>
>  struct util_queue_job {
>     void *job;
>     struct util_queue_fence *fence;
>     util_queue_execute_func execute;
>     util_queue_execute_func cleanup;
>  };
>
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list