<div dir="ltr"><div>For the series:</div><div><br></div><div><div>Reviewed-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>></div><div><br></div><div>Marek</div></div></div><br><div class="gmail_quote"><div dir="ltr">On Thu, Dec 6, 2018 at 7:57 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
The following race condition could occur in the no-timeout case:<br>
<br>
  API thread               Gallium thread            Watchdog<br>
  ----------               --------------            --------<br>
  dd_before_draw<br>
  u_threaded_context draw<br>
  dd_after_draw<br>
    add to dctx->records<br>
    signal watchdog<br>
                                                     dump & destroy record<br>
                           execute draw<br>
                           dd_after_draw_async<br>
                             use-after-free!<br>
<br>
Alternatively, the same scenario would assert in a debug build when<br>
destroying the record because record->driver_finished has not signaled.<br>
<br>
Fix this and simplify the logic at the same time by<br>
- handing the record pointers off to the watchdog thread *before* each<br>
  draw call and<br>
- waiting on the driver_finished fence in the watchdog thread<br>
---<br>
 .../auxiliary/driver_ddebug/dd_context.c      |   1 -<br>
 src/gallium/auxiliary/driver_ddebug/dd_draw.c | 103 ++++++++----------<br>
 src/gallium/auxiliary/driver_ddebug/dd_pipe.h |  21 ++--<br>
 3 files changed, 52 insertions(+), 73 deletions(-)<br>
<br>
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c b/src/gallium/auxiliary/driver_ddebug/dd_context.c<br>
index a9ac6ef14ab..15efeccf879 100644<br>
--- a/src/gallium/auxiliary/driver_ddebug/dd_context.c<br>
+++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c<br>
@@ -589,21 +589,20 @@ static void<br>
 dd_context_destroy(struct pipe_context *_pipe)<br>
 {<br>
    struct dd_context *dctx = dd_context(_pipe);<br>
    struct pipe_context *pipe = dctx->pipe;<br>
<br>
    dd_thread_join(dctx);<br>
    mtx_destroy(&dctx->mutex);<br>
    cnd_destroy(&dctx->cond);<br>
<br>
    assert(list_empty(&dctx->records));<br>
-   assert(!dctx->record_pending);<br>
<br>
    if (pipe->set_log_context) {<br>
       pipe->set_log_context(pipe, NULL);<br>
<br>
       if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) {<br>
          FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);<br>
          if (f) {<br>
             fprintf(f, "Remainder of driver log:\n\n");<br>
          }<br>
<br>
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_draw.c b/src/gallium/auxiliary/driver_ddebug/dd_draw.c<br>
index cb5db8ab83b..a930299ebb7 100644<br>
--- a/src/gallium/auxiliary/driver_ddebug/dd_draw.c<br>
+++ b/src/gallium/auxiliary/driver_ddebug/dd_draw.c<br>
@@ -981,80 +981,75 @@ dd_report_hang(struct dd_context *dctx)<br>
          }<br>
<br>
          fclose(f);<br>
       }<br>
<br>
       if (top_not_reached)<br>
          stop_output = true;<br>
       encountered_hang = true;<br>
    }<br>
<br>
-   if (num_later || dctx->record_pending) {<br>
-      fprintf(stderr, "... and %u%s additional draws.\n", num_later,<br>
-              dctx->record_pending ? "+1 (pending)" : "");<br>
-   }<br>
+   if (num_later)<br>
+      fprintf(stderr, "... and %u additional draws.\n", num_later);<br>
<br>
    fprintf(stderr, "\nDone.\n");<br>
    dd_kill_process();<br>
 }<br>
<br>
 int<br>
 dd_thread_main(void *input)<br>
 {<br>
    struct dd_context *dctx = (struct dd_context *)input;<br>
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);<br>
    struct pipe_screen *screen = dscreen->screen;<br>
<br>
    mtx_lock(&dctx->mutex);<br>
<br>
    for (;;) {<br>
       struct list_head records;<br>
-      struct pipe_fence_handle *fence;<br>
-      struct pipe_fence_handle *fence2 = NULL;<br>
-<br>
       list_replace(&dctx->records, &records);<br>
       list_inithead(&dctx->records);<br>
       dctx->num_records = 0;<br>
<br>
       if (dctx->api_stalled)<br>
          cnd_signal(&dctx->cond);<br>
<br>
-      if (!list_empty(&records)) {<br>
-         /* Wait for the youngest draw. This means hangs can take a bit longer<br>
-          * to detect, but it's more efficient this way. */<br>
-         struct dd_draw_record *youngest =<br>
-            LIST_ENTRY(struct dd_draw_record, records.prev, list);<br>
-         fence = youngest->bottom_of_pipe;<br>
-      } else if (dctx->record_pending) {<br>
-         /* Wait for pending fences, in case the driver ends up hanging internally. */<br>
-         fence = dctx->record_pending->prev_bottom_of_pipe;<br>
-         fence2 = dctx->record_pending->top_of_pipe;<br>
-      } else if (dctx->kill_thread) {<br>
-         break;<br>
-      } else {<br>
+      if (list_empty(&records)) {<br>
+         if (dctx->kill_thread)<br>
+            break;<br>
+<br>
          cnd_wait(&dctx->cond, &dctx->mutex);<br>
          continue;<br>
       }<br>
+<br>
       mtx_unlock(&dctx->mutex);<br>
<br>
-      /* Fences can be NULL legitimately when timeout detection is disabled. */<br>
-      if ((fence &&<br>
-           !screen->fence_finish(screen, NULL, fence,<br>
-                                 (uint64_t)dscreen->timeout_ms * 1000*1000)) ||<br>
-          (fence2 &&<br>
-           !screen->fence_finish(screen, NULL, fence2,<br>
-                                 (uint64_t)dscreen->timeout_ms * 1000*1000))) {<br>
-         mtx_lock(&dctx->mutex);<br>
-         list_splice(&records, &dctx->records);<br>
-         dd_report_hang(dctx);<br>
-         /* we won't actually get here */<br>
-         mtx_unlock(&dctx->mutex);<br>
+      /* Wait for the youngest draw. This means hangs can take a bit longer<br>
+       * to detect, but it's more efficient this way.  */<br>
+      struct dd_draw_record *youngest =<br>
+         list_last_entry(&records, struct dd_draw_record, list);<br>
+<br>
+      if (dscreen->timeout_ms > 0) {<br>
+         uint64_t abs_timeout = os_time_get_absolute_timeout(<br>
+                                 (uint64_t)dscreen->timeout_ms * 1000*1000);<br>
+<br>
+         if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) ||<br>
+             !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe,<br>
+                                   (uint64_t)dscreen->timeout_ms * 1000*1000)) {<br>
+            mtx_lock(&dctx->mutex);<br>
+            list_splice(&records, &dctx->records);<br>
+            dd_report_hang(dctx);<br>
+            /* we won't actually get here */<br>
+            mtx_unlock(&dctx->mutex);<br>
+         }<br>
+      } else {<br>
+         util_queue_fence_wait(&youngest->driver_finished);<br>
       }<br>
<br>
       list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {<br>
          dd_maybe_dump_record(dscreen, record);<br>
          list_del(&record->list);<br>
          dd_free_record(screen, record);<br>
       }<br>
<br>
       mtx_lock(&dctx->mutex);<br>
    }<br>
@@ -1072,20 +1067,21 @@ dd_create_record(struct dd_context *dctx)<br>
       return NULL;<br>
<br>
    record->dctx = dctx;<br>
    record->draw_call = dctx->num_draw_calls;<br>
<br>
    record->prev_bottom_of_pipe = NULL;<br>
    record->top_of_pipe = NULL;<br>
    record->bottom_of_pipe = NULL;<br>
    record->log_page = NULL;<br>
    util_queue_fence_init(&record->driver_finished);<br>
+   util_queue_fence_reset(&record->driver_finished);<br>
<br>
    dd_init_copy_of_draw_state(&record->draw_state);<br>
    dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);<br>
<br>
    return record;<br>
 }<br>
<br>
 static void<br>
 dd_context_flush(struct pipe_context *_pipe,<br>
                  struct pipe_fence_handle **fence, unsigned flags)<br>
@@ -1108,41 +1104,50 @@ dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record)<br>
    if (dscreen->timeout_ms > 0) {<br>
       if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) {<br>
          pipe->flush(pipe, &record->prev_bottom_of_pipe, 0);<br>
          screen->fence_reference(screen, &record->top_of_pipe, record->prev_bottom_of_pipe);<br>
       } else {<br>
          pipe->flush(pipe, &record->prev_bottom_of_pipe,<br>
                      PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE);<br>
          pipe->flush(pipe, &record->top_of_pipe,<br>
                      PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);<br>
       }<br>
+   }<br>
<br>
-      mtx_lock(&dctx->mutex);<br>
-      dctx->record_pending = record;<br>
-      if (list_empty(&dctx->records))<br>
-         cnd_signal(&dctx->cond);<br>
-      mtx_unlock(&dctx->mutex);<br>
+   mtx_lock(&dctx->mutex);<br>
+   if (unlikely(dctx->num_records > 10000)) {<br>
+      dctx->api_stalled = true;<br>
+      /* Since this is only a heuristic to prevent the API thread from getting<br>
+       * too far ahead, we don't need a loop here. */<br>
+      cnd_wait(&dctx->cond, &dctx->mutex);<br>
+      dctx->api_stalled = false;<br>
    }<br>
+<br>
+   if (list_empty(&dctx->records))<br>
+      cnd_signal(&dctx->cond);<br>
+<br>
+   list_addtail(&record->list, &dctx->records);<br>
+   dctx->num_records++;<br>
+   mtx_unlock(&dctx->mutex);<br>
 }<br>
<br>
 static void<br>
 dd_after_draw_async(void *data)<br>
 {<br>
    struct dd_draw_record *record = (struct dd_draw_record *)data;<br>
    struct dd_context *dctx = record->dctx;<br>
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);<br>
<br>
    record->log_page = u_log_new_page(&dctx->log);<br>
    record->time_after = os_time_get_nano();<br>
<br>
-   if (!util_queue_fence_is_signalled(&record->driver_finished))<br>
-      util_queue_fence_signal(&record->driver_finished);<br>
+   util_queue_fence_signal(&record->driver_finished);<br>
<br>
    if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&<br>
        dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {<br>
       dd_thread_join(dctx);<br>
       /* No need to continue. */<br>
       exit(0);<br>
    }<br>
 }<br>
<br>
 static void<br>
@@ -1151,48 +1156,28 @@ dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record)<br>
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);<br>
    struct pipe_context *pipe = dctx->pipe;<br>
<br>
    if (dscreen->timeout_ms > 0) {<br>
       unsigned flush_flags;<br>
       if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count)<br>
          flush_flags = 0;<br>
       else<br>
          flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;<br>
       pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);<br>
-<br>
-      assert(record == dctx->record_pending);<br>
    }<br>
<br>
    if (pipe->callback) {<br>
-      util_queue_fence_reset(&record->driver_finished);<br>
       pipe->callback(pipe, dd_after_draw_async, record, true);<br>
    } else {<br>
       dd_after_draw_async(record);<br>
    }<br>
<br>
-   mtx_lock(&dctx->mutex);<br>
-   if (unlikely(dctx->num_records > 10000)) {<br>
-      dctx->api_stalled = true;<br>
-      /* Since this is only a heuristic to prevent the API thread from getting<br>
-       * too far ahead, we don't need a loop here. */<br>
-      cnd_wait(&dctx->cond, &dctx->mutex);<br>
-      dctx->api_stalled = false;<br>
-   }<br>
-<br>
-   if (list_empty(&dctx->records))<br>
-      cnd_signal(&dctx->cond);<br>
-<br>
-   list_addtail(&record->list, &dctx->records);<br>
-   dctx->record_pending = NULL;<br>
-   dctx->num_records++;<br>
-   mtx_unlock(&dctx->mutex);<br>
-<br>
    ++dctx->num_draw_calls;<br>
    if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)<br>
       fprintf(stderr, "Gallium debugger reached %u draw calls.\n",<br>
               dctx->num_draw_calls);<br>
 }<br>
<br>
 static void<br>
 dd_context_draw_vbo(struct pipe_context *_pipe,<br>
                     const struct pipe_draw_info *info)<br>
 {<br>
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h<br>
index 07c4d55017f..12da8280aa6 100644<br>
--- a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h<br>
+++ b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h<br>
@@ -267,20 +267,21 @@ struct dd_draw_state_copy<br>
 };<br>
<br>
 struct dd_draw_record {<br>
    struct list_head list;<br>
    struct dd_context *dctx;<br>
<br>
    int64_t time_before;<br>
    int64_t time_after;<br>
    unsigned draw_call;<br>
<br>
+   /* The fence pointers are guaranteed to be valid once driver_finished is signalled */<br>
    struct pipe_fence_handle *prev_bottom_of_pipe;<br>
    struct pipe_fence_handle *top_of_pipe;<br>
    struct pipe_fence_handle *bottom_of_pipe;<br>
<br>
    struct dd_call call;<br>
    struct dd_draw_state_copy draw_state;<br>
<br>
    struct util_queue_fence driver_finished;<br>
    struct u_log_page *log_page;<br>
 };<br>
@@ -290,38 +291,32 @@ struct dd_context<br>
    struct pipe_context base;<br>
    struct pipe_context *pipe;<br>
<br>
    struct dd_draw_state draw_state;<br>
    unsigned num_draw_calls;<br>
<br>
    struct u_log_context log;<br>
<br>
    /* Pipelined hang detection.<br>
     *<br>
-    * This is without unnecessary flushes and waits. There is a memory-based<br>
-    * fence that is incremented by clear_buffer every draw call. Driver fences<br>
-    * are not used.<br>
+    * Before each draw call, a new dd_draw_record is created that contains<br>
+    * a copy of all states. After each draw call, the driver's log is added<br>
+    * to this record. Additionally, deferred fences are associated to each<br>
+    * record both before and after the draw.<br>
     *<br>
-    * After each draw call, a new dd_draw_record is created that contains<br>
-    * a copy of all states, the output of pipe_context::dump_debug_state,<br>
-    * and it has a fence number assigned. That's done without knowing whether<br>
-    * that draw call is problematic or not. The record is added into the list<br>
-    * of all records.<br>
-    *<br>
-    * An independent, separate thread loops over the list of records and checks<br>
-    * their fences. Records with signalled fences are freed. On fence timeout,<br>
-    * the thread dumps the records of in-flight draws.<br>
+    * The records are handed off to a separate thread which waits on the<br>
+    * records' fences. Records with signalled fences are freed. When a timeout<br>
+    * is detected, the thread dumps the records of in-flight draws.<br>
     */<br>
    thrd_t thread;<br>
    mtx_t mutex;<br>
    cnd_t cond;<br>
-   struct dd_draw_record *record_pending; /* currently inside the driver */<br>
    struct list_head records; /* oldest record first */<br>
    unsigned num_records;<br>
    bool kill_thread;<br>
    bool api_stalled;<br>
 };<br>
<br>
<br>
 struct pipe_context *<br>
 dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);<br>
<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>