[Mesa-dev] [PATCH 20/25] ddebug: rewrite to always use a threaded approach

Nicolai Hähnle nhaehnle at gmail.com
Sun Oct 22 19:08:03 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

This patch has multiple goals:

1. Off-load the writing of records in 'always' mode to another thread
   for performance.

2. Allow using ddebug with threaded contexts. This really forces us to
   move some of the "after_draw" handling into another thread.

3. Simplify the different modes of ddebug, both in the code and in
   the user interface, i.e. GALLIUM_DDEBUG. In particular, there's
   no 'pipelined' anymore, since we're always pipelined; and 'noflush'
   is replaced by 'flush', since we no longer flush by default.

4. Fix the fences in pipelining mode. They previously relied on writes
   via pipe_context::clear_buffer. However, on radeonsi, those could
   (quite reasonably) end up in the SDMA buffer. So we use the newly
   added PIPE_FLUSH_{TOP,BOTTOM}_OF_PIPE fences instead.

5. Improve pipelined mode overall, using the finer grained information
   provided by the new fences.

Overall, the result is that pipelined mode should be more useful, and
using ddebug in default mode is much less invasive, in the sense that
it changes the overall driver behavior less (which is kind of crucial
for a driver debugging tool).

An example of the new hang debug output:

  Gallium debugger active.
  Hang detection timeout is 1000ms.
  GPU hang detected, collecting information...

  Draw #   driver  prev BOP  TOP  BOP  dump file
  -------------------------------------------------------------
  2          YES      YES    YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000000
  3          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000001
  4          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000002
  5          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000003

  Done.

We can see that there were almost certainly 4 draws in flight when
the hang happened: the top-of-pipe fence was signaled for all 4 draws,
the bottom-of-pipe fence for none of them. In virtually all cases,
we'd expect the first draw in the list to be at fault, but due to the
GPU parallelism, it's possible (though highly unlikely) that one of
the later draws causes a component to get stuck in a way that prevents
the earlier draws from making progress as well.

(In the above example, there were actually only 3 draws truly in flight:
the last draw is a blit that waits for the earlier draws; however, its
top-of-pipe fence is emitted before the cache flush and wait, and so
the fact that the draw hasn't truly started yet can only be seen from a
closer inspection of GPU state.)
---
 src/gallium/drivers/ddebug/dd_context.c |  65 +--
 src/gallium/drivers/ddebug/dd_draw.c    | 790 ++++++++++++++++----------------
 src/gallium/drivers/ddebug/dd_pipe.h    |  45 +-
 src/gallium/drivers/ddebug/dd_screen.c  | 161 ++++---
 4 files changed, 546 insertions(+), 515 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c
index 2abbff933f6..558708df58d 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -557,44 +557,47 @@ dd_context_set_stream_output_targets(struct pipe_context *_pipe,
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
    struct dd_draw_state *dstate = &dctx->draw_state;
 
    dstate->num_so_targets = num_targets;
    safe_memcpy(dstate->so_targets, tgs, sizeof(*tgs) * num_targets);
    safe_memcpy(dstate->so_offsets, offsets, sizeof(*offsets) * num_targets);
    pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets);
 }
 
+void
+dd_thread_join(struct dd_context *dctx)
+{
+   mtx_lock(&dctx->mutex);
+   dctx->kill_thread = true;
+   cnd_signal(&dctx->cond);
+   mtx_unlock(&dctx->mutex);
+   thrd_join(dctx->thread, NULL);
+}
+
 static void
 dd_context_destroy(struct pipe_context *_pipe)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
 
-   if (dctx->thread) {
-      mtx_lock(&dctx->mutex);
-      dctx->kill_thread = 1;
-      mtx_unlock(&dctx->mutex);
-      thrd_join(dctx->thread, NULL);
-      mtx_destroy(&dctx->mutex);
-      assert(!dctx->records);
-   }
+   dd_thread_join(dctx);
+   mtx_destroy(&dctx->mutex);
+   cnd_destroy(&dctx->cond);
 
-   if (dctx->fence) {
-      pipe->transfer_unmap(pipe, dctx->fence_transfer);
-      pipe_resource_reference(&dctx->fence, NULL);
-   }
+   assert(list_empty(&dctx->records));
+   assert(!dctx->record_pending);
 
    if (pipe->set_log_context) {
       pipe->set_log_context(pipe, NULL);
 
-      if (dd_screen(dctx->base.screen)->mode == DD_DUMP_ALL_CALLS) {
+      if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) {
          FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);
          if (f) {
             fprintf(f, "Remainder of driver log:\n\n");
          }
 
          u_log_new_page_print(&dctx->log, f);
          fclose(f);
       }
    }
    u_log_context_destroy(&dctx->log);
@@ -914,46 +917,26 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
    CTX_INIT(make_image_handle_resident);
 
    dd_init_draw_functions(dctx);
 
    u_log_context_init(&dctx->log);
    if (pipe->set_log_context)
       pipe->set_log_context(pipe, &dctx->log);
 
    dctx->draw_state.sample_mask = ~0;
 
-   if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) {
-      dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM,
-                                            PIPE_USAGE_STAGING, 4);
-      if (!dctx->fence)
-         goto fail;
-
-      dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence,
-                                           PIPE_TRANSFER_READ_WRITE |
-                                           PIPE_TRANSFER_PERSISTENT |
-                                           PIPE_TRANSFER_COHERENT,
-                                           &dctx->fence_transfer);
-      if (!dctx->mapped_fence)
-         goto fail;
-
-      *dctx->mapped_fence = 0;
-
-      (void) mtx_init(&dctx->mutex, mtx_plain);
-      dctx->thread = u_thread_create(dd_thread_pipelined_hang_detect, dctx);
-      if (!dctx->thread) {
-         mtx_destroy(&dctx->mutex);
-         goto fail;
-      }
+   list_inithead(&dctx->records);
+   (void) mtx_init(&dctx->mutex, mtx_plain);
+   (void) cnd_init(&dctx->cond);
+   dctx->thread = u_thread_create(dd_thread_main, dctx);
+   if (!dctx->thread) {
+      mtx_destroy(&dctx->mutex);
+      goto fail;
    }
 
    return &dctx->base;
 
 fail:
-   if (dctx) {
-      if (dctx->mapped_fence)
-         pipe_transfer_unmap(pipe, dctx->fence_transfer);
-      pipe_resource_reference(&dctx->fence, NULL);
-      FREE(dctx);
-   }
+   FREE(dctx);
    pipe->destroy(pipe);
    return NULL;
 }
diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c
index a15801beb1d..99c9c929b2e 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -32,64 +32,63 @@
 #include "util/u_framebuffer.h"
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
 #include "util/os_time.h"
 #include <inttypes.h>
 
 
-FILE *
-dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
+static void
+dd_write_header(FILE *f, struct pipe_screen *screen, unsigned apitrace_call_number)
 {
-   struct pipe_screen *screen = dscreen->screen;
    char cmd_line[4096];
-
-   FILE *f = dd_get_debug_file(dscreen->verbose);
-   if (!f)
-      return NULL;
-
    if (os_get_command_line(cmd_line, sizeof(cmd_line)))
       fprintf(f, "Command: %s\n", cmd_line);
    fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
    fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
    fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
 
    if (apitrace_call_number)
-      fprintf(f, "Last apitrace call: %u\n\n",
-              apitrace_call_number);
+      fprintf(f, "Last apitrace call: %u\n\n", apitrace_call_number);
+}
+
+FILE *
+dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
+{
+   struct pipe_screen *screen = dscreen->screen;
+
+   FILE *f = dd_get_debug_file(dscreen->verbose);
+   if (!f)
+      return NULL;
+
+   dd_write_header(f, screen, apitrace_call_number);
    return f;
 }
 
 static void
 dd_dump_dmesg(FILE *f)
 {
    char line[2000];
    FILE *p = popen("dmesg | tail -n60", "r");
 
    if (!p)
       return;
 
    fprintf(f, "\nLast 60 lines of dmesg:\n\n");
    while (fgets(line, sizeof(line), p))
       fputs(line, f);
 
    pclose(p);
 }
 
-static void
-dd_close_file_stream(FILE *f)
-{
-   fclose(f);
-}
-
 static unsigned
 dd_num_active_viewports(struct dd_draw_state *dstate)
 {
    struct tgsi_shader_info info;
    const struct tgsi_token *tokens;
 
    if (dstate->shaders[PIPE_SHADER_GEOMETRY])
       tokens = dstate->shaders[PIPE_SHADER_GEOMETRY]->state.shader.tokens;
    else if (dstate->shaders[PIPE_SHADER_TESS_EVAL])
       tokens = dstate->shaders[PIPE_SHADER_TESS_EVAL]->state.shader.tokens;
@@ -543,100 +542,29 @@ dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call)
    case CALL_GENERATE_MIPMAP:
       dd_dump_generate_mipmap(state, f);
       break;
    case CALL_GET_QUERY_RESULT_RESOURCE:
       dd_dump_get_query_result_resource(&call->info.get_query_result_resource, f);
       break;
    }
 }
 
 static void
-dd_write_report(struct dd_context *dctx, struct dd_call *call, unsigned flags,
-                bool dump_dmesg)
-{
-   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                dctx->draw_state.apitrace_call_number);
-
-   if (!f)
-      return;
-
-   dd_dump_call(f, &dctx->draw_state, call);
-   dd_dump_driver_state(dctx, f, flags);
-
-   fprintf(f,"\n\n**************************************************"
-             "***************************\n");
-   fprintf(f, "Context Log:\n\n");
-   u_log_new_page_print(&dctx->log, f);
-
-   if (dump_dmesg)
-      dd_dump_dmesg(f);
-   dd_close_file_stream(f);
-}
-
-static void
 dd_kill_process(void)
 {
    sync();
    fprintf(stderr, "dd: Aborting the process...\n");
    fflush(stdout);
    fflush(stderr);
    exit(1);
 }
 
-static bool
-dd_flush_and_check_hang(struct dd_context *dctx,
-                        struct pipe_fence_handle **flush_fence,
-                        unsigned flush_flags)
-{
-   struct pipe_fence_handle *fence = NULL;
-   struct pipe_context *pipe = dctx->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   uint64_t timeout_ms = dd_screen(dctx->base.screen)->timeout_ms;
-   bool idle;
-
-   assert(timeout_ms > 0);
-
-   pipe->flush(pipe, &fence, flush_flags);
-   if (flush_fence)
-      screen->fence_reference(screen, flush_fence, fence);
-   if (!fence)
-      return false;
-
-   idle = screen->fence_finish(screen, pipe, fence, timeout_ms * 1000000);
-   screen->fence_reference(screen, &fence, NULL);
-   if (!idle)
-      fprintf(stderr, "dd: GPU hang detected!\n");
-   return !idle;
-}
-
-static void
-dd_flush_and_handle_hang(struct dd_context *dctx,
-                         struct pipe_fence_handle **fence, unsigned flags,
-                         const char *cause)
-{
-   if (dd_flush_and_check_hang(dctx, fence, flags)) {
-      FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                   dctx->draw_state.apitrace_call_number);
-
-      if (f) {
-         fprintf(f, "dd: %s.\n", cause);
-         dd_dump_driver_state(dctx, f,
-                              PIPE_DUMP_DEVICE_STATUS_REGISTERS);
-         dd_dump_dmesg(f);
-         dd_close_file_stream(f);
-      }
-
-      /* Terminate the process to prevent future hangs. */
-      dd_kill_process();
-   }
-}
-
 static void
 dd_unreference_copy_of_call(struct dd_call *dst)
 {
    switch (dst->type) {
    case CALL_DRAW_VBO:
       pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output, NULL);
       pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer, NULL);
       pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count, NULL);
       if (dst->info.draw_vbo.draw.index_size &&
           !dst->info.draw_vbo.draw.has_user_indices)
@@ -672,103 +600,20 @@ dd_unreference_copy_of_call(struct dd_call *dst)
    case CALL_GENERATE_MIPMAP:
       pipe_resource_reference(&dst->info.generate_mipmap.res, NULL);
       break;
    case CALL_GET_QUERY_RESULT_RESOURCE:
       pipe_resource_reference(&dst->info.get_query_result_resource.resource, NULL);
       break;
    }
 }
 
 static void
-dd_copy_call(struct dd_call *dst, struct dd_call *src)
-{
-   dst->type = src->type;
-
-   switch (src->type) {
-   case CALL_DRAW_VBO:
-      pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output,
-                               src->info.draw_vbo.draw.count_from_stream_output);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer,
-                              src->info.draw_vbo.indirect.buffer);
-      pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count,
-                              src->info.draw_vbo.indirect.indirect_draw_count);
-
-      if (dst->info.draw_vbo.draw.index_size &&
-          !dst->info.draw_vbo.draw.has_user_indices)
-         pipe_resource_reference(&dst->info.draw_vbo.draw.index.resource, NULL);
-      else
-         dst->info.draw_vbo.draw.index.user = NULL;
-
-      if (src->info.draw_vbo.draw.index_size &&
-          !src->info.draw_vbo.draw.has_user_indices) {
-         pipe_resource_reference(&dst->info.draw_vbo.draw.index.resource,
-                                 src->info.draw_vbo.draw.index.resource);
-      }
-
-      dst->info.draw_vbo = src->info.draw_vbo;
-      if (!src->info.draw_vbo.draw.indirect)
-         dst->info.draw_vbo.draw.indirect = NULL;
-      else
-         dst->info.draw_vbo.draw.indirect = &dst->info.draw_vbo.indirect;
-      break;
-   case CALL_LAUNCH_GRID:
-      pipe_resource_reference(&dst->info.launch_grid.indirect,
-                              src->info.launch_grid.indirect);
-      dst->info.launch_grid = src->info.launch_grid;
-      break;
-   case CALL_RESOURCE_COPY_REGION:
-      pipe_resource_reference(&dst->info.resource_copy_region.dst,
-                              src->info.resource_copy_region.dst);
-      pipe_resource_reference(&dst->info.resource_copy_region.src,
-                              src->info.resource_copy_region.src);
-      dst->info.resource_copy_region = src->info.resource_copy_region;
-      break;
-   case CALL_BLIT:
-      pipe_resource_reference(&dst->info.blit.dst.resource,
-                              src->info.blit.dst.resource);
-      pipe_resource_reference(&dst->info.blit.src.resource,
-                              src->info.blit.src.resource);
-      dst->info.blit = src->info.blit;
-      break;
-   case CALL_FLUSH_RESOURCE:
-      pipe_resource_reference(&dst->info.flush_resource,
-                              src->info.flush_resource);
-      break;
-   case CALL_CLEAR:
-      dst->info.clear = src->info.clear;
-      break;
-   case CALL_CLEAR_BUFFER:
-      pipe_resource_reference(&dst->info.clear_buffer.res,
-                              src->info.clear_buffer.res);
-      dst->info.clear_buffer = src->info.clear_buffer;
-      break;
-   case CALL_CLEAR_TEXTURE:
-      break;
-   case CALL_CLEAR_RENDER_TARGET:
-      break;
-   case CALL_CLEAR_DEPTH_STENCIL:
-      break;
-   case CALL_GENERATE_MIPMAP:
-      pipe_resource_reference(&dst->info.generate_mipmap.res,
-                              src->info.generate_mipmap.res);
-      dst->info.generate_mipmap = src->info.generate_mipmap;
-      break;
-   case CALL_GET_QUERY_RESULT_RESOURCE:
-      pipe_resource_reference(&dst->info.get_query_result_resource.resource,
-                              src->info.get_query_result_resource.resource);
-      dst->info.get_query_result_resource = src->info.get_query_result_resource;
-      dst->info.get_query_result_resource.query = NULL;
-      break;
-   }
-}
-
-static void
 dd_init_copy_of_draw_state(struct dd_draw_state_copy *state)
 {
    unsigned i,j;
 
    /* Just clear pointers to gallium objects. Don't clear the whole structure,
     * because it would kill performance with its size of 130 KB.
     */
    memset(state->base.vertex_buffers, 0,
           sizeof(state->base.vertex_buffers));
    memset(state->base.so_targets, 0,
@@ -928,494 +773,627 @@ dd_copy_draw_state(struct dd_draw_state *dst, struct dd_draw_state *src)
    dst->clip_state = src->clip_state;
    util_copy_framebuffer_state(&dst->framebuffer_state, &src->framebuffer_state);
    memcpy(dst->scissors, src->scissors, sizeof(src->scissors));
    memcpy(dst->viewports, src->viewports, sizeof(src->viewports));
    memcpy(dst->tess_default_levels, src->tess_default_levels,
           sizeof(src->tess_default_levels));
    dst->apitrace_call_number = src->apitrace_call_number;
 }
 
 static void
-dd_free_record(struct dd_draw_record **record)
+dd_free_record(struct pipe_screen *screen, struct dd_draw_record *record)
+{
+   u_log_page_destroy(record->log_page);
+   dd_unreference_copy_of_call(&record->call);
+   dd_unreference_copy_of_draw_state(&record->draw_state);
+   screen->fence_reference(screen, &record->prev_bottom_of_pipe, NULL);
+   screen->fence_reference(screen, &record->top_of_pipe, NULL);
+   screen->fence_reference(screen, &record->bottom_of_pipe, NULL);
+   util_queue_fence_destroy(&record->driver_finished);
+   FREE(record);
+}
+
+static void
+dd_write_record(FILE *f, struct dd_draw_record *record)
 {
-   struct dd_draw_record *next = (*record)->next;
+   dd_dump_call(f, &record->draw_state.base, &record->call);
 
-   u_log_page_destroy((*record)->log_page);
-   dd_unreference_copy_of_call(&(*record)->call);
-   dd_unreference_copy_of_draw_state(&(*record)->draw_state);
-   FREE(*record);
-   *record = next;
+   if (record->log_page) {
+      fprintf(f,"\n\n**************************************************"
+                "***************************\n");
+      fprintf(f, "Context Log:\n\n");
+      u_log_page_print(record->log_page, f);
+   }
 }
 
 static void
-dd_dump_record(struct dd_context *dctx, struct dd_draw_record *record,
-               uint32_t hw_sequence_no, int64_t now)
+dd_maybe_dump_record(struct dd_screen *dscreen, struct dd_draw_record *record)
 {
-   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-                                record->draw_state.base.apitrace_call_number);
-   if (!f)
+   if (dscreen->dump_mode == DD_DUMP_ONLY_HANGS ||
+       (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
+        dscreen->apitrace_dump_call != record->draw_state.base.apitrace_call_number))
       return;
 
-   fprintf(f, "Draw call sequence # = %u\n", record->sequence_no);
-   fprintf(f, "HW reached sequence # = %u\n", hw_sequence_no);
-   fprintf(f, "Elapsed time = %"PRIi64" ms\n\n",
-           (now - record->timestamp) / 1000);
-
-   dd_dump_call(f, &record->draw_state.base, &record->call);
+   char name[512];
+   dd_get_debug_filename_and_mkdir(name, sizeof(name), dscreen->verbose);
+   FILE *f = fopen(name, "w");
+   if (!f) {
+      fprintf(stderr, "dd: failed to open %s\n", name);
+      return;
+   }
 
-   fprintf(f,"\n\n**************************************************"
-             "***************************\n");
-   fprintf(f, "Context Log:\n\n");
-   u_log_page_print(record->log_page, f);
+   dd_write_header(f, dscreen->screen, record->draw_state.base.apitrace_call_number);
+   dd_write_record(f, record);
 
-   dctx->pipe->dump_debug_state(dctx->pipe, f,
-                                PIPE_DUMP_DEVICE_STATUS_REGISTERS);
-   dd_dump_dmesg(f);
    fclose(f);
 }
 
-int
-dd_thread_pipelined_hang_detect(void *input)
+static const char *
+dd_fence_state(struct pipe_screen *screen, struct pipe_fence_handle *fence,
+               bool *not_reached)
+{
+   if (!fence)
+      return "---";
+
+   bool ok = screen->fence_finish(screen, NULL, fence, 0);
+
+   if (not_reached && !ok)
+      *not_reached = true;
+
+   return ok ? "YES" : "NO ";
+}
+
+static void
+dd_report_hang(struct dd_context *dctx)
 {
-   struct dd_context *dctx = (struct dd_context *)input;
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_screen *screen = dscreen->screen;
+   bool encountered_hang = false;
+   bool stop_output = false;
+   unsigned num_later = 0;
 
-   mtx_lock(&dctx->mutex);
+   fprintf(stderr, "GPU hang detected, collecting information...\n\n");
 
-   while (!dctx->kill_thread) {
-      struct dd_draw_record **record = &dctx->records;
+   fprintf(stderr, "Draw #   driver  prev BOP  TOP  BOP  dump file\n"
+                   "-------------------------------------------------------------\n");
 
-      /* Loop over all records. */
-      while (*record) {
-         int64_t now;
+   list_for_each_entry(struct dd_draw_record, record, &dctx->records, list) {
+      if (!encountered_hang &&
+          screen->fence_finish(screen, NULL, record->bottom_of_pipe, 0)) {
+         dd_maybe_dump_record(dscreen, record);
+         continue;
+      }
 
-         /* If the fence has been signalled, release the record and all older
-          * records.
-          */
-         if (*dctx->mapped_fence >= (*record)->sequence_no) {
-            while (*record)
-               dd_free_record(record);
-            break;
-         }
+      if (stop_output) {
+         dd_maybe_dump_record(dscreen, record);
+         num_later++;
+         continue;
+      }
 
-         /* The fence hasn't been signalled. Check the timeout. */
-         now = os_time_get();
-         if (os_time_timeout((*record)->timestamp,
-                             (*record)->timestamp + dscreen->timeout_ms * 1000,
-                             now)) {
-            fprintf(stderr, "GPU hang detected.\n");
+      bool driver = util_queue_fence_is_signalled(&record->driver_finished);
+      bool top_not_reached = false;
+      const char *prev_bop = dd_fence_state(screen, record->prev_bottom_of_pipe, NULL);
+      const char *top = dd_fence_state(screen, record->top_of_pipe, &top_not_reached);
+      const char *bop = dd_fence_state(screen, record->bottom_of_pipe, NULL);
 
-            /* Get the oldest unsignalled draw call. */
-            while ((*record)->next &&
-                   *dctx->mapped_fence < (*record)->next->sequence_no)
-               record = &(*record)->next;
+      fprintf(stderr, "%-9u %s      %s     %s  %s  ",
+              record->draw_call, driver ? "YES" : "NO ", prev_bop, top, bop);
 
-            dd_dump_record(dctx, *record, *dctx->mapped_fence, now);
-            dd_kill_process();
+      char name[512];
+      dd_get_debug_filename_and_mkdir(name, sizeof(name), false);
+
+      FILE *f = fopen(name, "w");
+      if (!f) {
+         fprintf(stderr, "fopen failed\n");
+      } else {
+         fprintf(stderr, "%s\n", name);
+
+         dd_write_header(f, dscreen->screen, record->draw_state.base.apitrace_call_number);
+         dd_write_record(f, record);
+
+         if (!encountered_hang) {
+            dd_dump_driver_state(dctx, f, PIPE_DUMP_DEVICE_STATUS_REGISTERS);
+            dd_dump_dmesg(f);
          }
 
-         record = &(*record)->next;
+         fclose(f);
       }
 
-      /* Unlock and sleep before starting all over again. */
-      mtx_unlock(&dctx->mutex);
-      os_time_sleep(10000); /* 10 ms */
-      mtx_lock(&dctx->mutex);
+      if (top_not_reached)
+         stop_output = true;
+      encountered_hang = true;
    }
 
-   /* Thread termination. */
-   while (dctx->records)
-      dd_free_record(&dctx->records);
+   if (num_later || dctx->record_pending) {
+      fprintf(stderr, "... and %u%s additional draws.\n", num_later,
+              dctx->record_pending ? "+1 (pending)" : "");
+   }
+
+   fprintf(stderr, "\nDone.\n");
+   dd_kill_process();
+}
 
+int
+dd_thread_main(void *input)
+{
+   struct dd_context *dctx = (struct dd_context *)input;
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   mtx_lock(&dctx->mutex);
+
+   for (;;) {
+      struct list_head records;
+      struct pipe_fence_handle *fence;
+      struct pipe_fence_handle *fence2 = NULL;
+
+      list_replace(&dctx->records, &records);
+      list_inithead(&dctx->records);
+      dctx->num_records = 0;
+
+      if (dctx->api_stalled)
+         cnd_signal(&dctx->cond);
+
+      if (!list_empty(&records)) {
+         /* Wait for the youngest draw. This means hangs can take a bit longer
+          * to detect, but it's more efficient this way. */
+         struct dd_draw_record *youngest =
+            LIST_ENTRY(struct dd_draw_record, records.prev, list);
+         fence = youngest->bottom_of_pipe;
+      } else if (dctx->record_pending) {
+         /* Wait for pending fences, in case the driver ends up hanging internally. */
+         fence = dctx->record_pending->prev_bottom_of_pipe;
+         fence2 = dctx->record_pending->top_of_pipe;
+      } else if (dctx->kill_thread) {
+         break;
+      } else {
+         cnd_wait(&dctx->cond, &dctx->mutex);
+         continue;
+      }
+      mtx_unlock(&dctx->mutex);
+
+      /* Fences can be NULL legitimately when timeout detection is disabled. */
+      if ((fence &&
+           !screen->fence_finish(screen, NULL, fence,
+                                 dscreen->timeout_ms * 1000*1000)) ||
+          (fence2 &&
+           !screen->fence_finish(screen, NULL, fence2,
+                                 dscreen->timeout_ms * 1000*1000))) {
+         mtx_lock(&dctx->mutex);
+         list_splice(&records, &dctx->records);
+         dd_report_hang(dctx);
+         /* we won't actually get here */
+         mtx_unlock(&dctx->mutex);
+      }
+
+      list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {
+         dd_maybe_dump_record(dscreen, record);
+         list_del(&record->list);
+         dd_free_record(screen, record);
+      }
+
+      mtx_lock(&dctx->mutex);
+   }
    mtx_unlock(&dctx->mutex);
    return 0;
 }
 
-static void
-dd_pipelined_process_draw(struct dd_context *dctx, struct dd_call *call)
+static struct dd_draw_record *
+dd_create_record(struct dd_context *dctx)
 {
-   struct pipe_context *pipe = dctx->pipe;
    struct dd_draw_record *record;
 
-   /* Make a record of the draw call. */
    record = MALLOC_STRUCT(dd_draw_record);
    if (!record)
-      return;
+      return NULL;
 
-   /* Update the fence with the GPU.
-    *
-    * radeonsi/clear_buffer waits in the command processor until shaders are
-    * idle before writing to memory. That's a necessary condition for isolating
-    * draw calls.
-    */
-   dctx->sequence_no++;
-   pipe->clear_buffer(pipe, dctx->fence, 0, 4, &dctx->sequence_no, 4);
+   record->dctx = dctx;
+   record->draw_call = dctx->num_draw_calls;
 
-   /* Initialize the record. */
-   record->timestamp = os_time_get();
-   record->sequence_no = dctx->sequence_no;
-   record->log_page = u_log_new_page(&dctx->log);
-
-   memset(&record->call, 0, sizeof(record->call));
-   dd_copy_call(&record->call, call);
+   record->prev_bottom_of_pipe = NULL;
+   record->top_of_pipe = NULL;
+   record->bottom_of_pipe = NULL;
+   record->log_page = NULL;
+   util_queue_fence_init(&record->driver_finished);
 
    dd_init_copy_of_draw_state(&record->draw_state);
    dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);
 
-   /* Add the record to the list. */
-   mtx_lock(&dctx->mutex);
-   record->next = dctx->records;
-   dctx->records = record;
-   mtx_unlock(&dctx->mutex);
+   return record;
 }
 
 static void
 dd_context_flush(struct pipe_context *_pipe,
                  struct pipe_fence_handle **fence, unsigned flags)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
 
-   switch (dd_screen(dctx->base.screen)->mode) {
-   case DD_DETECT_HANGS:
-      dd_flush_and_handle_hang(dctx, fence, flags,
-                               "GPU hang detected in pipe->flush()");
-      break;
-   case DD_DETECT_HANGS_PIPELINED: /* nothing to do here */
-   case DD_DUMP_ALL_CALLS:
-   case DD_DUMP_APITRACE_CALL:
-      pipe->flush(pipe, fence, flags);
-      break;
-   default:
-      assert(0);
+   pipe->flush(pipe, fence, flags);
+}
+
+static void
+dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record)
+{
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_context *pipe = dctx->pipe;
+   struct pipe_screen *screen = dscreen->screen;
+
+   if (dscreen->timeout_ms > 0) {
+      if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) {
+         pipe->flush(pipe, &record->prev_bottom_of_pipe, 0);
+         screen->fence_reference(screen, &record->top_of_pipe, record->prev_bottom_of_pipe);
+      } else {
+         pipe->flush(pipe, &record->prev_bottom_of_pipe,
+                     PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE);
+         pipe->flush(pipe, &record->top_of_pipe,
+                     PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);
+      }
+
+      mtx_lock(&dctx->mutex);
+      dctx->record_pending = record;
+      if (list_empty(&dctx->records))
+         cnd_signal(&dctx->cond);
+      mtx_unlock(&dctx->mutex);
    }
 }
 
 static void
-dd_before_draw(struct dd_context *dctx)
+dd_after_draw_async(void *data)
 {
+   struct dd_draw_record *record = (struct dd_draw_record *)data;
+   struct dd_context *dctx = record->dctx;
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
 
-   if (dscreen->mode == DD_DETECT_HANGS &&
-       !dscreen->no_flush &&
-       dctx->num_draw_calls >= dscreen->skip_count)
-      dd_flush_and_handle_hang(dctx, NULL, 0,
-                               "GPU hang most likely caused by internal "
-                               "driver commands");
+   record->log_page = u_log_new_page(&dctx->log);
+
+   if (!util_queue_fence_is_signalled(&record->driver_finished))
+      util_queue_fence_signal(&record->driver_finished);
+
+   if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
+       dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {
+      dd_thread_join(dctx);
+      /* No need to continue. */
+      exit(0);
+   }
 }
 
 static void
-dd_after_draw(struct dd_context *dctx, struct dd_call *call)
+dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record)
 {
    struct dd_screen *dscreen = dd_screen(dctx->base.screen);
    struct pipe_context *pipe = dctx->pipe;
 
-   if (dctx->num_draw_calls >= dscreen->skip_count) {
-      switch (dscreen->mode) {
-      case DD_DETECT_HANGS:
-         if (!dscreen->no_flush &&
-            dd_flush_and_check_hang(dctx, NULL, 0)) {
-            dd_write_report(dctx, call,
-                         PIPE_DUMP_DEVICE_STATUS_REGISTERS,
-                         true);
-
-            /* Terminate the process to prevent future hangs. */
-            dd_kill_process();
-         } else {
-            u_log_page_destroy(u_log_new_page(&dctx->log));
-         }
-         break;
-      case DD_DETECT_HANGS_PIPELINED:
-         dd_pipelined_process_draw(dctx, call);
-         break;
-      case DD_DUMP_ALL_CALLS:
-         if (!dscreen->no_flush)
-            pipe->flush(pipe, NULL, 0);
-         dd_write_report(dctx, call, 0, false);
-         break;
-      case DD_DUMP_APITRACE_CALL:
-         if (dscreen->apitrace_dump_call ==
-             dctx->draw_state.apitrace_call_number) {
-            dd_write_report(dctx, call, 0, false);
-            /* No need to continue. */
-            exit(0);
-         } else {
-            u_log_page_destroy(u_log_new_page(&dctx->log));
-         }
-         break;
-      default:
-         assert(0);
-      }
+   if (dscreen->timeout_ms > 0) {
+      unsigned flush_flags;
+      if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count)
+         flush_flags = 0;
+      else
+         flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;
+      pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);
+
+      assert(record == dctx->record_pending);
    }
 
+   if (pipe->callback) {
+      util_queue_fence_reset(&record->driver_finished);
+      pipe->callback(pipe, dd_after_draw_async, record, true);
+   } else {
+      dd_after_draw_async(record);
+   }
+
+   mtx_lock(&dctx->mutex);
+   if (unlikely(dctx->num_records > 10000)) {
+      dctx->api_stalled = true;
+      /* Since this is only a heuristic to prevent the API thread from getting
+       * too far ahead, we don't need a loop here. */
+      cnd_wait(&dctx->cond, &dctx->mutex);
+      dctx->api_stalled = false;
+   }
+
+   if (list_empty(&dctx->records))
+      cnd_signal(&dctx->cond);
+
+   list_addtail(&record->list, &dctx->records);
+   dctx->record_pending = NULL;
+   dctx->num_records++;
+   mtx_unlock(&dctx->mutex);
+
    ++dctx->num_draw_calls;
    if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
       fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
               dctx->num_draw_calls);
 }
 
 static void
 dd_context_draw_vbo(struct pipe_context *_pipe,
                     const struct pipe_draw_info *info)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_DRAW_VBO;
+   record->call.info.draw_vbo.draw = *info;
+   record->call.info.draw_vbo.draw.count_from_stream_output = NULL;
+   pipe_so_target_reference(&record->call.info.draw_vbo.draw.count_from_stream_output,
+                            info->count_from_stream_output);
+   if (info->index_size && !info->has_user_indices) {
+      record->call.info.draw_vbo.draw.index.resource = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.draw.index.resource,
+                              info->index.resource);
+   }
 
-   call.type = CALL_DRAW_VBO;
-   call.info.draw_vbo.draw = *info;
    if (info->indirect) {
-      call.info.draw_vbo.indirect = *info->indirect;
-      call.info.draw_vbo.draw.indirect = &call.info.draw_vbo.indirect;
+      record->call.info.draw_vbo.indirect = *info->indirect;
+      record->call.info.draw_vbo.draw.indirect = &record->call.info.draw_vbo.indirect;
+
+      record->call.info.draw_vbo.indirect.buffer = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.indirect.buffer,
+                              info->indirect->buffer);
+      record->call.info.draw_vbo.indirect.indirect_draw_count = NULL;
+      pipe_resource_reference(&record->call.info.draw_vbo.indirect.indirect_draw_count,
+                              info->indirect->indirect_draw_count);
    } else {
-      memset(&call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
+      memset(&record->call.info.draw_vbo.indirect, 0, sizeof(*info->indirect));
    }
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->draw_vbo(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_launch_grid(struct pipe_context *_pipe,
                        const struct pipe_grid_info *info)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_LAUNCH_GRID;
-   call.info.launch_grid = *info;
+   record->call.type = CALL_LAUNCH_GRID;
+   record->call.info.launch_grid = *info;
+   record->call.info.launch_grid.indirect = NULL;
+   pipe_resource_reference(&record->call.info.launch_grid.indirect, info->indirect);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->launch_grid(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_resource_copy_region(struct pipe_context *_pipe,
                                 struct pipe_resource *dst, unsigned dst_level,
                                 unsigned dstx, unsigned dsty, unsigned dstz,
                                 struct pipe_resource *src, unsigned src_level,
                                 const struct pipe_box *src_box)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
-
-   call.type = CALL_RESOURCE_COPY_REGION;
-   call.info.resource_copy_region.dst = dst;
-   call.info.resource_copy_region.dst_level = dst_level;
-   call.info.resource_copy_region.dstx = dstx;
-   call.info.resource_copy_region.dsty = dsty;
-   call.info.resource_copy_region.dstz = dstz;
-   call.info.resource_copy_region.src = src;
-   call.info.resource_copy_region.src_level = src_level;
-   call.info.resource_copy_region.src_box = *src_box;
-
-   dd_before_draw(dctx);
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_RESOURCE_COPY_REGION;
+   record->call.info.resource_copy_region.dst = NULL;
+   pipe_resource_reference(&record->call.info.resource_copy_region.dst, dst);
+   record->call.info.resource_copy_region.dst_level = dst_level;
+   record->call.info.resource_copy_region.dstx = dstx;
+   record->call.info.resource_copy_region.dsty = dsty;
+   record->call.info.resource_copy_region.dstz = dstz;
+   record->call.info.resource_copy_region.src = NULL;
+   pipe_resource_reference(&record->call.info.resource_copy_region.src, src);
+   record->call.info.resource_copy_region.src_level = src_level;
+   record->call.info.resource_copy_region.src_box = *src_box;
+
+   dd_before_draw(dctx, record);
    pipe->resource_copy_region(pipe,
                               dst, dst_level, dstx, dsty, dstz,
                               src, src_level, src_box);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_BLIT;
-   call.info.blit = *info;
+   record->call.type = CALL_BLIT;
+   record->call.info.blit = *info;
+   record->call.info.blit.dst.resource = NULL;
+   pipe_resource_reference(&record->call.info.blit.dst.resource, info->dst.resource);
+   record->call.info.blit.src.resource = NULL;
+   pipe_resource_reference(&record->call.info.blit.src.resource, info->src.resource);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->blit(pipe, info);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static boolean
 dd_context_generate_mipmap(struct pipe_context *_pipe,
                            struct pipe_resource *res,
                            enum pipe_format format,
                            unsigned base_level,
                            unsigned last_level,
                            unsigned first_layer,
                            unsigned last_layer)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
    boolean result;
 
-   call.type = CALL_GENERATE_MIPMAP;
-   call.info.generate_mipmap.res = res;
-   call.info.generate_mipmap.format = format;
-   call.info.generate_mipmap.base_level = base_level;
-   call.info.generate_mipmap.last_level = last_level;
-   call.info.generate_mipmap.first_layer = first_layer;
-   call.info.generate_mipmap.last_layer = last_layer;
+   record->call.type = CALL_GENERATE_MIPMAP;
+   record->call.info.generate_mipmap.res = NULL;
+   pipe_resource_reference(&record->call.info.generate_mipmap.res, res);
+   record->call.info.generate_mipmap.format = format;
+   record->call.info.generate_mipmap.base_level = base_level;
+   record->call.info.generate_mipmap.last_level = last_level;
+   record->call.info.generate_mipmap.first_layer = first_layer;
+   record->call.info.generate_mipmap.last_layer = last_layer;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    result = pipe->generate_mipmap(pipe, res, format, base_level, last_level,
                                   first_layer, last_layer);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
    return result;
 }
 
 static void
 dd_context_get_query_result_resource(struct pipe_context *_pipe,
                                      struct pipe_query *query,
                                      boolean wait,
                                      enum pipe_query_value_type result_type,
                                      int index,
                                      struct pipe_resource *resource,
                                      unsigned offset)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct dd_query *dquery = dd_query(query);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
-
-   call.type = CALL_GET_QUERY_RESULT_RESOURCE;
-   call.info.get_query_result_resource.query = query;
-   call.info.get_query_result_resource.wait = wait;
-   call.info.get_query_result_resource.result_type = result_type;
-   call.info.get_query_result_resource.index = index;
-   call.info.get_query_result_resource.resource = resource;
-   call.info.get_query_result_resource.offset = offset;
-
-   /* In pipelined mode, the query may be deleted by the time we need to
-    * print it.
-    */
-   call.info.get_query_result_resource.query_type = dquery->type;
-
-   dd_before_draw(dctx);
+   struct dd_draw_record *record = dd_create_record(dctx);
+
+   record->call.type = CALL_GET_QUERY_RESULT_RESOURCE;
+   record->call.info.get_query_result_resource.query = query;
+   record->call.info.get_query_result_resource.wait = wait;
+   record->call.info.get_query_result_resource.result_type = result_type;
+   record->call.info.get_query_result_resource.index = index;
+   record->call.info.get_query_result_resource.resource = NULL;
+   pipe_resource_reference(&record->call.info.get_query_result_resource.resource,
+                           resource);
+   record->call.info.get_query_result_resource.offset = offset;
+
+   /* The query may be deleted by the time we need to print it. */
+   record->call.info.get_query_result_resource.query_type = dquery->type;
+
+   dd_before_draw(dctx, record);
    pipe->get_query_result_resource(pipe, dquery->query, wait,
                                    result_type, index, resource, offset);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_flush_resource(struct pipe_context *_pipe,
                           struct pipe_resource *resource)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_FLUSH_RESOURCE;
-   call.info.flush_resource = resource;
+   record->call.type = CALL_FLUSH_RESOURCE;
+   record->call.info.flush_resource = NULL;
+   pipe_resource_reference(&record->call.info.flush_resource, resource);
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->flush_resource(pipe, resource);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_clear(struct pipe_context *_pipe, unsigned buffers,
                  const union pipe_color_union *color, double depth,
                  unsigned stencil)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR;
-   call.info.clear.buffers = buffers;
-   call.info.clear.color = *color;
-   call.info.clear.depth = depth;
-   call.info.clear.stencil = stencil;
+   record->call.type = CALL_CLEAR;
+   record->call.info.clear.buffers = buffers;
+   record->call.info.clear.color = *color;
+   record->call.info.clear.depth = depth;
+   record->call.info.clear.stencil = stencil;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear(pipe, buffers, color, depth, stencil);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_clear_render_target(struct pipe_context *_pipe,
                                struct pipe_surface *dst,
                                const union pipe_color_union *color,
                                unsigned dstx, unsigned dsty,
                                unsigned width, unsigned height,
                                bool render_condition_enabled)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_RENDER_TARGET;
+   record->call.type = CALL_CLEAR_RENDER_TARGET;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
                              render_condition_enabled);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_clear_depth_stencil(struct pipe_context *_pipe,
                                struct pipe_surface *dst, unsigned clear_flags,
                                double depth, unsigned stencil, unsigned dstx,
                                unsigned dsty, unsigned width, unsigned height,
                                bool render_condition_enabled)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_DEPTH_STENCIL;
+   record->call.type = CALL_CLEAR_DEPTH_STENCIL;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
                              dstx, dsty, width, height,
                              render_condition_enabled);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
                         unsigned offset, unsigned size,
                         const void *clear_value, int clear_value_size)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_BUFFER;
-   call.info.clear_buffer.res = res;
-   call.info.clear_buffer.offset = offset;
-   call.info.clear_buffer.size = size;
-   call.info.clear_buffer.clear_value = clear_value;
-   call.info.clear_buffer.clear_value_size = clear_value_size;
+   record->call.type = CALL_CLEAR_BUFFER;
+   record->call.info.clear_buffer.res = NULL;
+   pipe_resource_reference(&record->call.info.clear_buffer.res, res);
+   record->call.info.clear_buffer.offset = offset;
+   record->call.info.clear_buffer.size = size;
+   record->call.info.clear_buffer.clear_value = clear_value;
+   record->call.info.clear_buffer.clear_value_size = clear_value_size;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 static void
 dd_context_clear_texture(struct pipe_context *_pipe,
                          struct pipe_resource *res,
                          unsigned level,
                          const struct pipe_box *box,
                          const void *data)
 {
    struct dd_context *dctx = dd_context(_pipe);
    struct pipe_context *pipe = dctx->pipe;
-   struct dd_call call;
+   struct dd_draw_record *record = dd_create_record(dctx);
 
-   call.type = CALL_CLEAR_TEXTURE;
+   record->call.type = CALL_CLEAR_TEXTURE;
 
-   dd_before_draw(dctx);
+   dd_before_draw(dctx, record);
    pipe->clear_texture(pipe, res, level, box, data);
-   dd_after_draw(dctx, &call);
+   dd_after_draw(dctx, record);
 }
 
 void
 dd_init_draw_functions(struct dd_context *dctx)
 {
    CTX_INIT(flush);
    CTX_INIT(draw_vbo);
    CTX_INIT(launch_grid);
    CTX_INIT(resource_copy_region);
    CTX_INIT(blit);
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index 252dbffac86..d1965be9a14 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -26,36 +26,39 @@
  **************************************************************************/
 
 #ifndef DD_H_
 #define DD_H_
 
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "pipe/p_screen.h"
 #include "dd_util.h"
 #include "os/os_thread.h"
+#include "util/list.h"
 #include "util/u_log.h"
+#include "util/u_queue.h"
 
-enum dd_mode {
-   DD_DETECT_HANGS,
-   DD_DETECT_HANGS_PIPELINED,
+struct dd_context;
+
+enum dd_dump_mode {
+   DD_DUMP_ONLY_HANGS,
    DD_DUMP_ALL_CALLS,
    DD_DUMP_APITRACE_CALL,
 };
 
 struct dd_screen
 {
    struct pipe_screen base;
    struct pipe_screen *screen;
    unsigned timeout_ms;
-   enum dd_mode mode;
-   bool no_flush;
+   enum dd_dump_mode dump_mode;
+   bool flush_always;
    bool verbose;
    unsigned skip_count;
    unsigned apitrace_dump_call;
 };
 
 enum call_type
 {
    CALL_DRAW_VBO,
    CALL_LAUNCH_GRID,
    CALL_RESOURCE_COPY_REGION,
@@ -211,27 +214,33 @@ struct dd_draw_state_copy
    struct dd_query render_cond;
    struct dd_state shaders[PIPE_SHADER_TYPES];
    struct dd_state sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
    struct dd_state velems;
    struct dd_state rs;
    struct dd_state dsa;
    struct dd_state blend;
 };
 
 struct dd_draw_record {
-   struct dd_draw_record *next;
+   struct list_head list;
+   struct dd_context *dctx;
+
+   unsigned draw_call;
 
-   int64_t timestamp;
-   uint32_t sequence_no;
+   struct pipe_fence_handle *prev_bottom_of_pipe;
+   struct pipe_fence_handle *top_of_pipe;
+   struct pipe_fence_handle *bottom_of_pipe;
 
    struct dd_call call;
    struct dd_draw_state_copy draw_state;
+
+   struct util_queue_fence driver_finished;
    struct u_log_page *log_page;
 };
 
 struct dd_context
 {
    struct pipe_context base;
    struct pipe_context *pipe;
 
    struct dd_draw_state draw_state;
    unsigned num_draw_calls;
@@ -245,41 +254,43 @@ struct dd_context
     * are not used.
     *
     * After each draw call, a new dd_draw_record is created that contains
     * a copy of all states, the output of pipe_context::dump_debug_state,
     * and it has a fence number assigned. That's done without knowing whether
     * that draw call is problematic or not. The record is added into the list
     * of all records.
     *
     * An independent, separate thread loops over the list of records and checks
     * their fences. Records with signalled fences are freed. On fence timeout,
-    * the thread dumps the record of the oldest unsignalled fence.
+    * the thread dumps the records of in-flight draws.
     */
    thrd_t thread;
    mtx_t mutex;
-   int kill_thread;
-   struct pipe_resource *fence;
-   struct pipe_transfer *fence_transfer;
-   uint32_t *mapped_fence;
-   uint32_t sequence_no;
-   struct dd_draw_record *records;
-   int max_log_buffer_size;
+   cnd_t cond;
+   struct dd_draw_record *record_pending; /* currently inside the driver */
+   struct list_head records; /* oldest record first */
+   unsigned num_records;
+   bool kill_thread;
+   bool api_stalled;
 };
 
 
 struct pipe_context *
 dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
 
 void
 dd_init_draw_functions(struct dd_context *dctx);
+
+void
+dd_thread_join(struct dd_context *dctx);
 int
-dd_thread_pipelined_hang_detect(void *input);
+dd_thread_main(void *input);
 
 FILE *
 dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number);
 
 static inline struct dd_context *
 dd_context(struct pipe_context *pipe)
 {
    return (struct dd_context *)pipe;
 }
 
diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c
index caf31f6df0f..11d1d8c1e9c 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -21,20 +21,21 @@
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
 
 #include "dd_pipe.h"
 #include "dd_public.h"
 #include "util/u_memory.h"
+#include <ctype.h>
 #include <stdio.h>
 
 
 static const char *
 dd_screen_get_name(struct pipe_screen *_screen)
 {
    struct pipe_screen *screen = dd_screen(_screen)->screen;
 
    return screen->get_name(screen);
 }
@@ -374,86 +375,143 @@ dd_screen_memobj_destroy(struct pipe_screen *_screen,
 static void
 dd_screen_destroy(struct pipe_screen *_screen)
 {
    struct dd_screen *dscreen = dd_screen(_screen);
    struct pipe_screen *screen = dscreen->screen;
 
    screen->destroy(screen);
    FREE(dscreen);
 }
 
+static void
+skip_space(const char **p)
+{
+   while (isspace(**p))
+      (*p)++;
+}
+
+static bool
+match_word(const char **cur, const char *word)
+{
+   size_t len = strlen(word);
+   if (strncmp(*cur, word, len) != 0)
+      return false;
+
+   const char *p = *cur + len;
+   if (*p) {
+      if (!isspace(*p))
+         return false;
+
+      *cur = p + 1;
+   } else {
+      *cur = p;
+   }
+
+   return true;
+}
+
+static bool
+match_uint(const char **cur, unsigned *value)
+{
+   char *end;
+   unsigned v = strtoul(*cur, &end, 0);
+   if (end == *cur || (*end && !isspace(*end)))
+      return false;
+   *cur = end;
+   *value = v;
+   return true;
+}
+
 struct pipe_screen *
 ddebug_screen_create(struct pipe_screen *screen)
 {
    struct dd_screen *dscreen;
    const char *option;
-   bool no_flush;
-   unsigned timeout = 0;
+   bool flush = false;
+   bool verbose = false;
+   unsigned timeout = 1000;
    unsigned apitrace_dump_call = 0;
-   enum dd_mode mode;
+   enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS;
 
    option = debug_get_option("GALLIUM_DDEBUG", NULL);
    if (!option)
       return screen;
 
    if (!strcmp(option, "help")) {
       puts("Gallium driver debugger");
       puts("");
       puts("Usage:");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"always [noflush] [verbose]\"");
-      puts("    Flush and dump context and driver information after every draw call into");
-      puts("    $HOME/"DD_DIR"/.");
+      puts("  GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [verbose]\"");
+      puts("  GALLIUM_DDEBUG_SKIP=[count]");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\"");
-      puts("    Flush and detect a device hang after every draw call based on the given");
-      puts("    fence timeout and dump context and driver information into");
-      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("Dump context and driver information of draw calls into");
+      puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information");
+      puts("about draw calls related to the hang.");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"pipelined [timeout in ms] [verbose]\"");
-      puts("    Detect a device hang after every draw call based on the given fence");
-      puts("    timeout without flushes and dump context and driver information into");
-      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("<timeout in ms>");
+      puts("  Change the default timeout for GPU hang detection (default=1000ms).");
+      puts("  Setting this to 0 will disable GPU hang detection entirely.");
       puts("");
-      puts("  GALLIUM_DDEBUG=\"apitrace [call#] [verbose]\"");
-      puts("    Dump apitrace draw call information into $HOME/"DD_DIR"/. Implies 'noflush'.");
+      puts("always");
+      puts("  Dump information about all draw calls.");
       puts("");
-      puts("  If 'noflush' is specified, do not flush on every draw call. In hang");
-      puts("  detection mode, this only detect hangs in pipe->flush.");
-      puts("  If 'verbose' is specified, additional information is written to stderr.");
+      puts("apitrace <call#>");
+      puts("  Dump information about the draw call corresponding to the given");
+      puts("  apitrace call number and exit.");
       puts("");
-      puts("  GALLIUM_DDEBUG_SKIP=[count]");
-      puts("    Skip flush and hang detection for the given initial number of draw calls.");
+      puts("flush");
+      puts("  Flush after every draw call.");
+      puts("");
+      puts("verbose");
+      puts("  Write additional information to stderr.");
+      puts("");
+      puts("GALLIUM_DDEBUG_SKIP=count");
+      puts("  Skip dumping on the first count draw calls (only relevant with 'always').");
       puts("");
       exit(0);
    }
 
-   no_flush = strstr(option, "noflush") != NULL;
-
-   if (!strncmp(option, "always", 6)) {
-      mode = DD_DUMP_ALL_CALLS;
-   } else if (!strncmp(option, "apitrace", 8)) {
-      mode = DD_DUMP_APITRACE_CALL;
-      no_flush = true;
-
-      if (sscanf(option+8, "%u", &apitrace_dump_call) != 1)
-         return screen;
-   } else if (!strncmp(option, "pipelined", 9)) {
-      mode = DD_DETECT_HANGS_PIPELINED;
-
-      if (sscanf(option+10, "%u", &timeout) != 1)
-         return screen;
-   } else {
-      mode = DD_DETECT_HANGS;
-
-      if (sscanf(option, "%u", &timeout) != 1)
-         return screen;
+   for (;;) {
+      skip_space(&option);
+      if (!*option)
+         break;
+
+      if (match_word(&option, "always")) {
+         if (mode == DD_DUMP_APITRACE_CALL) {
+            printf("ddebug: both 'always' and 'apitrace' specified\n");
+            exit(1);
+         }
+
+         mode = DD_DUMP_ALL_CALLS;
+      } else if (match_word(&option, "flush")) {
+         flush = true;
+      } else if (match_word(&option, "verbose")) {
+         verbose = true;
+      } else if (match_word(&option, "apitrace")) {
+         if (mode != DD_DUMP_ONLY_HANGS) {
+            printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n");
+            exit(1);
+         }
+
+         if (!match_uint(&option, &apitrace_dump_call)) {
+            printf("ddebug: expected call number after 'apitrace'\n");
+            exit(1);
+         }
+
+         mode = DD_DUMP_APITRACE_CALL;
+      } else if (match_uint(&option, &timeout)) {
+         /* no-op */
+      } else {
+         printf("ddebug: bad options: %s\n", option);
+         exit(1);
+      }
    }
 
    dscreen = CALLOC_STRUCT(dd_screen);
    if (!dscreen)
       return NULL;
 
 #define SCR_INIT(_member) \
    dscreen->base._member = screen->_member ? dd_screen_##_member : NULL
 
    dscreen->base.destroy = dd_screen_destroy;
@@ -489,39 +547,40 @@ ddebug_screen_create(struct pipe_screen *screen)
    SCR_INIT(get_driver_query_info);
    SCR_INIT(get_driver_query_group_info);
    SCR_INIT(get_compiler_options);
    SCR_INIT(get_driver_uuid);
    SCR_INIT(get_device_uuid);
 
 #undef SCR_INIT
 
    dscreen->screen = screen;
    dscreen->timeout_ms = timeout;
-   dscreen->mode = mode;
-   dscreen->no_flush = no_flush;
-   dscreen->verbose = strstr(option, "verbose") != NULL;
+   dscreen->dump_mode = mode;
+   dscreen->flush_always = flush;
+   dscreen->verbose = verbose;
    dscreen->apitrace_dump_call = apitrace_dump_call;
 
-   switch (dscreen->mode) {
+   switch (dscreen->dump_mode) {
    case DD_DUMP_ALL_CALLS:
       fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
       break;
-   case DD_DETECT_HANGS:
-   case DD_DETECT_HANGS_PIPELINED:
-      fprintf(stderr, "Gallium debugger active. "
-              "The hang detection timeout is %i ms.\n", timeout);
-      break;
    case DD_DUMP_APITRACE_CALL:
       fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n");
       break;
    default:
-      assert(0);
+      fprintf(stderr, "Gallium debugger active.\n");
+      break;
    }
 
+   if (dscreen->timeout_ms > 0)
+      fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms);
+   else
+      fprintf(stderr, "Hang detection is disabled.\n");
+
    dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
    if (dscreen->skip_count > 0) {
       fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
               dscreen->skip_count);
    }
 
    return &dscreen->base;
 }
-- 
2.11.0



More information about the mesa-dev mailing list