<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr">On Thu, Jan 3, 2019 at 3:01 PM Ian Romanick <<a href="mailto:idr@freedesktop.org">idr@freedesktop.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">On 11/28/18 6:59 PM, Marek Olšák wrote:<br>
> From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank">marek.olsak@amd.com</a>><br>
> <br>
> for ARB_parallel_shader_compile<br>
> ---<br>
>  src/util/u_queue.c | 49 +++++++++++++++++++++++++++++-----------------<br>
>  src/util/u_queue.h |  5 ++---<br>
>  2 files changed, 33 insertions(+), 21 deletions(-)<br>
> <br>
> diff --git a/src/util/u_queue.c b/src/util/u_queue.c<br>
> index 48c5c79552d..5aaf60ae78e 100644<br>
> --- a/src/util/u_queue.c<br>
> +++ b/src/util/u_queue.c<br>
> @@ -26,42 +26,43 @@<br>
>  <br>
>  #include "u_queue.h"<br>
>  <br>
>  #include <time.h><br>
>  <br>
>  #include "util/os_time.h"<br>
>  #include "util/u_string.h"<br>
>  #include "util/u_thread.h"<br>
>  #include "u_process.h"<br>
>  <br>
> -static void util_queue_killall_and_wait(struct util_queue *queue);<br>
> +static void<br>
> +util_queue_kill_threads(struct util_queue *queue, unsigned keep_num_threads);<br>
>  <br>
>  /****************************************************************************<br>
>   * Wait for all queues to assert idle when exit() is called.<br>
>   *<br>
>   * Otherwise, C++ static variable destructors can be called while threads<br>
>   * are using the static variables.<br>
>   */<br>
>  <br>
>  static once_flag atexit_once_flag = ONCE_FLAG_INIT;<br>
>  static struct list_head queue_list;<br>
>  static mtx_t exit_mutex = _MTX_INITIALIZER_NP;<br>
>  <br>
>  static void<br>
>  atexit_handler(void)<br>
>  {<br>
>     struct util_queue *iter;<br>
>  <br>
>     mtx_lock(&exit_mutex);<br>
>     /* Wait for all queues to assert idle. */<br>
>     LIST_FOR_EACH_ENTRY(iter, &queue_list, head) {<br>
> -      util_queue_killall_and_wait(iter);<br>
> +      util_queue_kill_threads(iter, 0);<br>
>     }<br>
>     mtx_unlock(&exit_mutex);<br>
>  }<br>
>  <br>
>  static void<br>
>  global_init(void)<br>
>  {<br>
>     LIST_INITHEAD(&queue_list);<br>
>     atexit(atexit_handler);<br>
>  }<br>
> @@ -259,55 +260,58 @@ util_queue_thread_func(void *input)<br>
>        u_thread_setname(name);<br>
>     }<br>
>  <br>
>     while (1) {<br>
>        struct util_queue_job job;<br>
>  <br>
>        mtx_lock(&queue->lock);<br>
>        assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);<br>
>  <br>
>        /* wait if the queue is empty */<br>
> -      while (!queue->kill_threads && queue->num_queued == 0)<br>
> +      while (thread_index < queue->num_threads && queue->num_queued == 0)<br>
>           cnd_wait(&queue->has_queued_cond, &queue->lock);<br>
>  <br>
> -      if (queue->kill_threads) {<br>
> +      /* only kill threads that are above "num_threads" */<br>
> +      if (thread_index >= queue->num_threads) {<br>
>           mtx_unlock(&queue->lock);<br>
>           break;<br>
>        }<br>
>  <br>
>        job = queue->jobs[queue->read_idx];<br>
>        memset(&queue->jobs[queue->read_idx], 0, sizeof(struct util_queue_job));<br>
>        queue->read_idx = (queue->read_idx + 1) % queue->max_jobs;<br>
>  <br>
>        queue->num_queued--;<br>
>        cnd_signal(&queue->has_space_cond);<br>
>        mtx_unlock(&queue->lock);<br>
>  <br>
>        if (job.job) {<br>
>           job.execute(job.job, thread_index);<br>
>           util_queue_fence_signal(job.fence);<br>
>           if (job.cleanup)<br>
>              job.cleanup(job.job, thread_index);<br>
>        }<br>
>     }<br>
>  <br>
> -   /* signal remaining jobs before terminating */<br>
> +   /* signal remaining jobs if all threads are being terminated */<br>
>     mtx_lock(&queue->lock);<br>
> -   for (unsigned i = queue->read_idx; i != queue->write_idx;<br>
> -        i = (i + 1) % queue->max_jobs) {<br>
> -      if (queue->jobs[i].job) {<br>
> -         util_queue_fence_signal(queue->jobs[i].fence);<br>
> -         queue->jobs[i].job = NULL;<br>
> +   if (queue->num_threads == 0) {<br>
> +      for (unsigned i = queue->read_idx; i != queue->write_idx;<br>
> +           i = (i + 1) % queue->max_jobs) {<br>
> +         if (queue->jobs[i].job) {<br>
> +            util_queue_fence_signal(queue->jobs[i].fence);<br>
> +            queue->jobs[i].job = NULL;<br>
> +         }<br>
>        }<br>
> +      queue->read_idx = queue->write_idx;<br>
> +      queue->num_queued = 0;<br>
>     }<br>
> -   queue->read_idx = queue->write_idx;<br>
> -   queue->num_queued = 0;<br>
>     mtx_unlock(&queue->lock);<br>
>     return 0;<br>
>  }<br>
>  <br>
>  static bool<br>
>  util_queue_create_thread(struct util_queue *queue, unsigned index)<br>
>  {<br>
>     struct thread_input *input =<br>
>        (struct thread_input *) malloc(sizeof(struct thread_input));<br>
>     input->queue = queue;<br>
> @@ -418,60 +422,69 @@ fail:<br>
>        cnd_destroy(&queue->has_queued_cond);<br>
>        mtx_destroy(&queue->lock);<br>
>        free(queue->jobs);<br>
>     }<br>
>     /* also util_queue_is_initialized can be used to check for success */<br>
>     memset(queue, 0, sizeof(*queue));<br>
>     return false;<br>
>  }<br>
>  <br>
>  static void<br>
> -util_queue_killall_and_wait(struct util_queue *queue)<br>
> +util_queue_kill_threads(struct util_queue *queue, unsigned keep_num_threads)<br>
>  {<br>
>     unsigned i;<br>
>  <br>
>     /* Signal all threads to terminate. */<br>
> +   mtx_lock(&queue->finish_lock);<br>
> +<br>
> +   if (keep_num_threads >= queue->num_threads) {<br>
> +      mtx_unlock(&queue->finish_lock);<br>
> +      return;<br>
> +   }<br>
> +<br>
>     mtx_lock(&queue->lock);<br>
> -   queue->kill_threads = 1;<br>
> +   unsigned old_num_threads = queue->num_threads;<br>
> +   queue->num_threads = keep_num_threads;<br>
<br>
Shouldn't this still be set below, after the threads are joined?<br></blockquote><div><br></div><div>The trick is that setting num_threads is what terminates the threads. Then cnd_broadcast wakes up the threads and they will run to the end when they see that thread_index >= num_threads.</div><div><br></div><div>I've added this explanation as a code comment locally.<br></div><div><br></div><div>Marek<br></div><div><br></div><div> <br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
>     cnd_broadcast(&queue->has_queued_cond);<br>
>     mtx_unlock(&queue->lock);<br>
>  <br>
> -   for (i = 0; i < queue->num_threads; i++)<br>
> +   for (i = keep_num_threads; i < old_num_threads; i++)<br>
>        thrd_join(queue->threads[i], NULL);<br>
> -   queue->num_threads = 0;<br>
> +<br>
> +   mtx_unlock(&queue->finish_lock);<br>
>  }<br>
>  <br>
>  void<br>
>  util_queue_destroy(struct util_queue *queue)<br>
>  {<br>
> -   util_queue_killall_and_wait(queue);<br>
> +   util_queue_kill_threads(queue, 0);<br>
>     remove_from_atexit_list(queue);<br>
>  <br>
>     cnd_destroy(&queue->has_space_cond);<br>
>     cnd_destroy(&queue->has_queued_cond);<br>
>     mtx_destroy(&queue->finish_lock);<br>
>     mtx_destroy(&queue->lock);<br>
>     free(queue->jobs);<br>
>     free(queue->threads);<br>
>  }<br>
>  <br>
>  void<br>
>  util_queue_add_job(struct util_queue *queue,<br>
>                     void *job,<br>
>                     struct util_queue_fence *fence,<br>
>                     util_queue_execute_func execute,<br>
>                     util_queue_execute_func cleanup)<br>
>  {<br>
>     struct util_queue_job *ptr;<br>
>  <br>
>     mtx_lock(&queue->lock);<br>
> -   if (queue->kill_threads) {<br>
> +   if (queue->num_threads == 0) {<br>
>        mtx_unlock(&queue->lock);<br>
>        /* well no good option here, but any leaks will be<br>
>         * short-lived as things are shutting down..<br>
>         */<br>
>        return;<br>
>     }<br>
>  <br>
>     util_queue_fence_reset(fence);<br>
>  <br>
>     assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);<br>
> diff --git a/src/util/u_queue.h b/src/util/u_queue.h<br>
> index 4e63a76aab2..756fa53e1bf 100644<br>
> --- a/src/util/u_queue.h<br>
> +++ b/src/util/u_queue.h<br>
> @@ -194,29 +194,28 @@ typedef void (*util_queue_execute_func)(void *job, int thread_index);<br>
>  struct util_queue_job {<br>
>     void *job;<br>
>     struct util_queue_fence *fence;<br>
>     util_queue_execute_func execute;<br>
>     util_queue_execute_func cleanup;<br>
>  };<br>
>  <br>
>  /* Put this into your context. */<br>
>  struct util_queue {<br>
>     char name[14]; /* 13 characters = the thread name without the index */<br>
> -   mtx_t finish_lock; /* only for util_queue_finish */<br>
> +   mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */<br>
>     mtx_t lock;<br>
>     cnd_t has_queued_cond;<br>
>     cnd_t has_space_cond;<br>
>     thrd_t *threads;<br>
>     unsigned flags;<br>
>     int num_queued;<br>
> -   unsigned num_threads;<br>
> -   int kill_threads;<br>
> +   unsigned num_threads; /* decreasing this number will terminate threads */<br>
>     int max_jobs;<br>
>     int write_idx, read_idx; /* ring buffer pointers */<br>
>     struct util_queue_job *jobs;<br>
>  <br>
>     /* for cleanup at exit(), protected by exit_mutex */<br>
>     struct list_head head;<br>
>  };<br>
>  <br>
>  bool util_queue_init(struct util_queue *queue,<br>
>                       const char *name,<br>
> <br>
<br>
</blockquote></div></div>