<div dir="ltr"><div>For the series:</div><div> </div><div><div>Reviewed-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com">marek.olsak@amd.com</a>></div><div> </div><div>Marek</div></div></div> <div class="gmail_quote"><div dir="ltr">On Thu, Dec 6, 2018 at 7:57 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote: </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>> The following race condition could occur in the no-timeout case: API thread Gallium thread Watchdog ---------- -------------- -------- dd_before_draw u_threaded_context draw dd_after_draw add to dctx->records signal watchdog dump & destroy record execute draw dd_after_draw_async use-after-free! Alternatively, the same scenario would assert in a debug build when destroying the record because record->driver_finished has not signaled. Fix this and simplify the logic at the same time by - handing the record pointers off to the watchdog thread *before* each draw call and - waiting on the driver_finished fence in the watchdog thread --- .../auxiliary/driver_ddebug/dd_context.c | 1 - src/gallium/auxiliary/driver_ddebug/dd_draw.c | 103 ++++++++---------- src/gallium/auxiliary/driver_ddebug/dd_pipe.h | 21 ++-- 3 files changed, 52 insertions(+), 73 deletions(-) diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c b/src/gallium/auxiliary/driver_ddebug/dd_context.c index a9ac6ef14ab..15efeccf879 100644 --- a/src/gallium/auxiliary/driver_ddebug/dd_context.c +++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c @@ -589,21 +589,20 @@ static void dd_context_destroy(struct pipe_context *_pipe) { struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; dd_thread_join(dctx); mtx_destroy(&dctx->mutex); cnd_destroy(&dctx->cond); assert(list_empty(&dctx->records)); - assert(!dctx->record_pending); if (pipe->set_log_context) { pipe->set_log_context(pipe, NULL); if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) { FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0); if (f) { fprintf(f, "Remainder of driver log:\n\n"); } diff --git a/src/gallium/auxiliary/driver_ddebug/dd_draw.c b/src/gallium/auxiliary/driver_ddebug/dd_draw.c index cb5db8ab83b..a930299ebb7 100644 --- a/src/gallium/auxiliary/driver_ddebug/dd_draw.c +++ b/src/gallium/auxiliary/driver_ddebug/dd_draw.c @@ -981,80 +981,75 @@ dd_report_hang(struct dd_context *dctx) } fclose(f); } if (top_not_reached) stop_output = true; encountered_hang = true; } - if (num_later || dctx->record_pending) { - fprintf(stderr, "... and %u%s additional draws.\n", num_later, - dctx->record_pending ? "+1 (pending)" : ""); - } + if (num_later) + fprintf(stderr, "... and %u additional draws.\n", num_later); fprintf(stderr, "\nDone.\n"); dd_kill_process(); } int dd_thread_main(void *input) { struct dd_context *dctx = (struct dd_context *)input; struct dd_screen *dscreen = dd_screen(dctx->base.screen); struct pipe_screen *screen = dscreen->screen; mtx_lock(&dctx->mutex); for (;;) { struct list_head records; - struct pipe_fence_handle *fence; - struct pipe_fence_handle *fence2 = NULL; - list_replace(&dctx->records, &records); list_inithead(&dctx->records); dctx->num_records = 0; if (dctx->api_stalled) cnd_signal(&dctx->cond); - if (!list_empty(&records)) { - /* Wait for the youngest draw. This means hangs can take a bit longer - * to detect, but it's more efficient this way. */ - struct dd_draw_record *youngest = - LIST_ENTRY(struct dd_draw_record, records.prev, list); - fence = youngest->bottom_of_pipe; - } else if (dctx->record_pending) { - /* Wait for pending fences, in case the driver ends up hanging internally. */ - fence = dctx->record_pending->prev_bottom_of_pipe; - fence2 = dctx->record_pending->top_of_pipe; - } else if (dctx->kill_thread) { - break; - } else { + if (list_empty(&records)) { + if (dctx->kill_thread) + break; + cnd_wait(&dctx->cond, &dctx->mutex); continue; } + mtx_unlock(&dctx->mutex); - /* Fences can be NULL legitimately when timeout detection is disabled. */ - if ((fence && - !screen->fence_finish(screen, NULL, fence, - (uint64_t)dscreen->timeout_ms * 1000*1000)) || - (fence2 && - !screen->fence_finish(screen, NULL, fence2, - (uint64_t)dscreen->timeout_ms * 1000*1000))) { - mtx_lock(&dctx->mutex); - list_splice(&records, &dctx->records); - dd_report_hang(dctx); - /* we won't actually get here */ - mtx_unlock(&dctx->mutex); + /* Wait for the youngest draw. This means hangs can take a bit longer + * to detect, but it's more efficient this way. */ + struct dd_draw_record *youngest = + list_last_entry(&records, struct dd_draw_record, list); + + if (dscreen->timeout_ms > 0) { + uint64_t abs_timeout = os_time_get_absolute_timeout( + (uint64_t)dscreen->timeout_ms * 1000*1000); + + if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) || + !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe, + (uint64_t)dscreen->timeout_ms * 1000*1000)) { + mtx_lock(&dctx->mutex); + list_splice(&records, &dctx->records); + dd_report_hang(dctx); + /* we won't actually get here */ + mtx_unlock(&dctx->mutex); + } + } else { + util_queue_fence_wait(&youngest->driver_finished); } list_for_each_entry_safe(struct dd_draw_record, record, &records, list) { dd_maybe_dump_record(dscreen, record); list_del(&record->list); dd_free_record(screen, record); } mtx_lock(&dctx->mutex); } @@ -1072,20 +1067,21 @@ dd_create_record(struct dd_context *dctx) return NULL; record->dctx = dctx; record->draw_call = dctx->num_draw_calls; record->prev_bottom_of_pipe = NULL; record->top_of_pipe = NULL; record->bottom_of_pipe = NULL; record->log_page = NULL; util_queue_fence_init(&record->driver_finished); + util_queue_fence_reset(&record->driver_finished); dd_init_copy_of_draw_state(&record->draw_state); dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state); return record; } static void dd_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) @@ -1108,41 +1104,50 @@ dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record) if (dscreen->timeout_ms > 0) { if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) { pipe->flush(pipe, &record->prev_bottom_of_pipe, 0); screen->fence_reference(screen, &record->top_of_pipe, record->prev_bottom_of_pipe); } else { pipe->flush(pipe, &record->prev_bottom_of_pipe, PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE); pipe->flush(pipe, &record->top_of_pipe, PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE); } + } - mtx_lock(&dctx->mutex); - dctx->record_pending = record; - if (list_empty(&dctx->records)) - cnd_signal(&dctx->cond); - mtx_unlock(&dctx->mutex); + mtx_lock(&dctx->mutex); + if (unlikely(dctx->num_records > 10000)) { + dctx->api_stalled = true; + /* Since this is only a heuristic to prevent the API thread from getting + * too far ahead, we don't need a loop here. */ + cnd_wait(&dctx->cond, &dctx->mutex); + dctx->api_stalled = false; } + + if (list_empty(&dctx->records)) + cnd_signal(&dctx->cond); + + list_addtail(&record->list, &dctx->records); + dctx->num_records++; + mtx_unlock(&dctx->mutex); } static void dd_after_draw_async(void *data) { struct dd_draw_record *record = (struct dd_draw_record *)data; struct dd_context *dctx = record->dctx; struct dd_screen *dscreen = dd_screen(dctx->base.screen); record->log_page = u_log_new_page(&dctx->log); record->time_after = os_time_get_nano(); - if (!util_queue_fence_is_signalled(&record->driver_finished)) - util_queue_fence_signal(&record->driver_finished); + util_queue_fence_signal(&record->driver_finished); if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL && dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) { dd_thread_join(dctx); /* No need to continue. */ exit(0); } } static void @@ -1151,48 +1156,28 @@ dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record) struct dd_screen *dscreen = dd_screen(dctx->base.screen); struct pipe_context *pipe = dctx->pipe; if (dscreen->timeout_ms > 0) { unsigned flush_flags; if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) flush_flags = 0; else flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE; pipe->flush(pipe, &record->bottom_of_pipe, flush_flags); - - assert(record == dctx->record_pending); } if (pipe->callback) { - util_queue_fence_reset(&record->driver_finished); pipe->callback(pipe, dd_after_draw_async, record, true); } else { dd_after_draw_async(record); } - mtx_lock(&dctx->mutex); - if (unlikely(dctx->num_records > 10000)) { - dctx->api_stalled = true; - /* Since this is only a heuristic to prevent the API thread from getting - * too far ahead, we don't need a loop here. */ - cnd_wait(&dctx->cond, &dctx->mutex); - dctx->api_stalled = false; - } - - if (list_empty(&dctx->records)) - cnd_signal(&dctx->cond); - - list_addtail(&record->list, &dctx->records); - dctx->record_pending = NULL; - dctx->num_records++; - mtx_unlock(&dctx->mutex); - ++dctx->num_draw_calls; if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0) fprintf(stderr, "Gallium debugger reached %u draw calls.\n", dctx->num_draw_calls); } static void dd_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { diff --git a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h index 07c4d55017f..12da8280aa6 100644 --- a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h +++ b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h @@ -267,20 +267,21 @@ struct dd_draw_state_copy }; struct dd_draw_record { struct list_head list; struct dd_context *dctx; int64_t time_before; int64_t time_after; unsigned draw_call; + /* The fence pointers are guaranteed to be valid once driver_finished is signalled */ struct pipe_fence_handle *prev_bottom_of_pipe; struct pipe_fence_handle *top_of_pipe; struct pipe_fence_handle *bottom_of_pipe; struct dd_call call; struct dd_draw_state_copy draw_state; struct util_queue_fence driver_finished; struct u_log_page *log_page; }; @@ -290,38 +291,32 @@ struct dd_context struct pipe_context base; struct pipe_context *pipe; struct dd_draw_state draw_state; unsigned num_draw_calls; struct u_log_context log; /* Pipelined hang detection. * - * This is without unnecessary flushes and waits. There is a memory-based - * fence that is incremented by clear_buffer every draw call. Driver fences - * are not used. + * Before each draw call, a new dd_draw_record is created that contains + * a copy of all states. After each draw call, the driver's log is added + * to this record. Additionally, deferred fences are associated to each + * record both before and after the draw. * - * After each draw call, a new dd_draw_record is created that contains - * a copy of all states, the output of pipe_context::dump_debug_state, - * and it has a fence number assigned. That's done without knowing whether - * that draw call is problematic or not. The record is added into the list - * of all records. - * - * An independent, separate thread loops over the list of records and checks - * their fences. Records with signalled fences are freed. On fence timeout, - * the thread dumps the records of in-flight draws. + * The records are handed off to a separate thread which waits on the + * records' fences. Records with signalled fences are freed. When a timeout + * is detected, the thread dumps the records of in-flight draws. */ thrd_t thread; mtx_t mutex; cnd_t cond; - struct dd_draw_record *record_pending; /* currently inside the driver */ struct list_head records; /* oldest record first */ unsigned num_records; bool kill_thread; bool api_stalled; }; struct pipe_context * dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe); -- 2.19.1 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div>