[Mesa-dev] [PATCH 1/2] ddebug: simplify watchdog loop and fix crash in the no-timeout case
Nicolai Hähnle
nhaehnle at gmail.com
Thu Dec 6 12:57:34 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The following race condition could occur in the no-timeout case:
API thread Gallium thread Watchdog
---------- -------------- --------
dd_before_draw
u_threaded_context draw
dd_after_draw
add to dctx->records
signal watchdog
dump & destroy record
execute draw
dd_after_draw_async
use-after-free!
Alternatively, the same scenario would assert in a debug build when
destroying the record because record->driver_finished has not signaled.
Fix this and simplify the logic at the same time by
- handing the record pointers off to the watchdog thread *before* each
draw call and
- waiting on the driver_finished fence in the watchdog thread
---
.../auxiliary/driver_ddebug/dd_context.c | 1 -
src/gallium/auxiliary/driver_ddebug/dd_draw.c | 103 ++++++++----------
src/gallium/auxiliary/driver_ddebug/dd_pipe.h | 21 ++--
3 files changed, 52 insertions(+), 73 deletions(-)
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c b/src/gallium/auxiliary/driver_ddebug/dd_context.c
index a9ac6ef14ab..15efeccf879 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_context.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c
@@ -589,21 +589,20 @@ static void
dd_context_destroy(struct pipe_context *_pipe)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
dd_thread_join(dctx);
mtx_destroy(&dctx->mutex);
cnd_destroy(&dctx->cond);
assert(list_empty(&dctx->records));
- assert(!dctx->record_pending);
if (pipe->set_log_context) {
pipe->set_log_context(pipe, NULL);
if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) {
FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);
if (f) {
fprintf(f, "Remainder of driver log:\n\n");
}
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_draw.c b/src/gallium/auxiliary/driver_ddebug/dd_draw.c
index cb5db8ab83b..a930299ebb7 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_draw.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_draw.c
@@ -981,80 +981,75 @@ dd_report_hang(struct dd_context *dctx)
}
fclose(f);
}
if (top_not_reached)
stop_output = true;
encountered_hang = true;
}
- if (num_later || dctx->record_pending) {
- fprintf(stderr, "... and %u%s additional draws.\n", num_later,
- dctx->record_pending ? "+1 (pending)" : "");
- }
+ if (num_later)
+ fprintf(stderr, "... and %u additional draws.\n", num_later);
fprintf(stderr, "\nDone.\n");
dd_kill_process();
}
int
dd_thread_main(void *input)
{
struct dd_context *dctx = (struct dd_context *)input;
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
struct pipe_screen *screen = dscreen->screen;
mtx_lock(&dctx->mutex);
for (;;) {
struct list_head records;
- struct pipe_fence_handle *fence;
- struct pipe_fence_handle *fence2 = NULL;
-
list_replace(&dctx->records, &records);
list_inithead(&dctx->records);
dctx->num_records = 0;
if (dctx->api_stalled)
cnd_signal(&dctx->cond);
- if (!list_empty(&records)) {
- /* Wait for the youngest draw. This means hangs can take a bit longer
- * to detect, but it's more efficient this way. */
- struct dd_draw_record *youngest =
- LIST_ENTRY(struct dd_draw_record, records.prev, list);
- fence = youngest->bottom_of_pipe;
- } else if (dctx->record_pending) {
- /* Wait for pending fences, in case the driver ends up hanging internally. */
- fence = dctx->record_pending->prev_bottom_of_pipe;
- fence2 = dctx->record_pending->top_of_pipe;
- } else if (dctx->kill_thread) {
- break;
- } else {
+ if (list_empty(&records)) {
+ if (dctx->kill_thread)
+ break;
+
cnd_wait(&dctx->cond, &dctx->mutex);
continue;
}
+
mtx_unlock(&dctx->mutex);
- /* Fences can be NULL legitimately when timeout detection is disabled. */
- if ((fence &&
- !screen->fence_finish(screen, NULL, fence,
- (uint64_t)dscreen->timeout_ms * 1000*1000)) ||
- (fence2 &&
- !screen->fence_finish(screen, NULL, fence2,
- (uint64_t)dscreen->timeout_ms * 1000*1000))) {
- mtx_lock(&dctx->mutex);
- list_splice(&records, &dctx->records);
- dd_report_hang(dctx);
- /* we won't actually get here */
- mtx_unlock(&dctx->mutex);
+ /* Wait for the youngest draw. This means hangs can take a bit longer
+ * to detect, but it's more efficient this way. */
+ struct dd_draw_record *youngest =
+ list_last_entry(&records, struct dd_draw_record, list);
+
+ if (dscreen->timeout_ms > 0) {
+ uint64_t abs_timeout = os_time_get_absolute_timeout(
+ (uint64_t)dscreen->timeout_ms * 1000*1000);
+
+ if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) ||
+ !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe,
+ (uint64_t)dscreen->timeout_ms * 1000*1000)) {
+ mtx_lock(&dctx->mutex);
+ list_splice(&records, &dctx->records);
+ dd_report_hang(dctx);
+ /* we won't actually get here */
+ mtx_unlock(&dctx->mutex);
+ }
+ } else {
+ util_queue_fence_wait(&youngest->driver_finished);
}
list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {
dd_maybe_dump_record(dscreen, record);
list_del(&record->list);
dd_free_record(screen, record);
}
mtx_lock(&dctx->mutex);
}
@@ -1072,20 +1067,21 @@ dd_create_record(struct dd_context *dctx)
return NULL;
record->dctx = dctx;
record->draw_call = dctx->num_draw_calls;
record->prev_bottom_of_pipe = NULL;
record->top_of_pipe = NULL;
record->bottom_of_pipe = NULL;
record->log_page = NULL;
util_queue_fence_init(&record->driver_finished);
+ util_queue_fence_reset(&record->driver_finished);
dd_init_copy_of_draw_state(&record->draw_state);
dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);
return record;
}
static void
dd_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence, unsigned flags)
@@ -1108,41 +1104,50 @@ dd_before_draw(struct dd_context *dctx, struct dd_draw_record *record)
if (dscreen->timeout_ms > 0) {
if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) {
pipe->flush(pipe, &record->prev_bottom_of_pipe, 0);
screen->fence_reference(screen, &record->top_of_pipe, record->prev_bottom_of_pipe);
} else {
pipe->flush(pipe, &record->prev_bottom_of_pipe,
PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE);
pipe->flush(pipe, &record->top_of_pipe,
PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);
}
+ }
- mtx_lock(&dctx->mutex);
- dctx->record_pending = record;
- if (list_empty(&dctx->records))
- cnd_signal(&dctx->cond);
- mtx_unlock(&dctx->mutex);
+ mtx_lock(&dctx->mutex);
+ if (unlikely(dctx->num_records > 10000)) {
+ dctx->api_stalled = true;
+ /* Since this is only a heuristic to prevent the API thread from getting
+ * too far ahead, we don't need a loop here. */
+ cnd_wait(&dctx->cond, &dctx->mutex);
+ dctx->api_stalled = false;
}
+
+ if (list_empty(&dctx->records))
+ cnd_signal(&dctx->cond);
+
+ list_addtail(&record->list, &dctx->records);
+ dctx->num_records++;
+ mtx_unlock(&dctx->mutex);
}
static void
dd_after_draw_async(void *data)
{
struct dd_draw_record *record = (struct dd_draw_record *)data;
struct dd_context *dctx = record->dctx;
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
record->log_page = u_log_new_page(&dctx->log);
record->time_after = os_time_get_nano();
- if (!util_queue_fence_is_signalled(&record->driver_finished))
- util_queue_fence_signal(&record->driver_finished);
+ util_queue_fence_signal(&record->driver_finished);
if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {
dd_thread_join(dctx);
/* No need to continue. */
exit(0);
}
}
static void
@@ -1151,48 +1156,28 @@ dd_after_draw(struct dd_context *dctx, struct dd_draw_record *record)
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
struct pipe_context *pipe = dctx->pipe;
if (dscreen->timeout_ms > 0) {
unsigned flush_flags;
if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count)
flush_flags = 0;
else
flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;
pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);
-
- assert(record == dctx->record_pending);
}
if (pipe->callback) {
- util_queue_fence_reset(&record->driver_finished);
pipe->callback(pipe, dd_after_draw_async, record, true);
} else {
dd_after_draw_async(record);
}
- mtx_lock(&dctx->mutex);
- if (unlikely(dctx->num_records > 10000)) {
- dctx->api_stalled = true;
- /* Since this is only a heuristic to prevent the API thread from getting
- * too far ahead, we don't need a loop here. */
- cnd_wait(&dctx->cond, &dctx->mutex);
- dctx->api_stalled = false;
- }
-
- if (list_empty(&dctx->records))
- cnd_signal(&dctx->cond);
-
- list_addtail(&record->list, &dctx->records);
- dctx->record_pending = NULL;
- dctx->num_records++;
- mtx_unlock(&dctx->mutex);
-
++dctx->num_draw_calls;
if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
dctx->num_draw_calls);
}
static void
dd_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h
index 07c4d55017f..12da8280aa6 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h
+++ b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h
@@ -267,20 +267,21 @@ struct dd_draw_state_copy
};
struct dd_draw_record {
struct list_head list;
struct dd_context *dctx;
int64_t time_before;
int64_t time_after;
unsigned draw_call;
+ /* The fence pointers are guaranteed to be valid once driver_finished is signalled */
struct pipe_fence_handle *prev_bottom_of_pipe;
struct pipe_fence_handle *top_of_pipe;
struct pipe_fence_handle *bottom_of_pipe;
struct dd_call call;
struct dd_draw_state_copy draw_state;
struct util_queue_fence driver_finished;
struct u_log_page *log_page;
};
@@ -290,38 +291,32 @@ struct dd_context
struct pipe_context base;
struct pipe_context *pipe;
struct dd_draw_state draw_state;
unsigned num_draw_calls;
struct u_log_context log;
/* Pipelined hang detection.
*
- * This is without unnecessary flushes and waits. There is a memory-based
- * fence that is incremented by clear_buffer every draw call. Driver fences
- * are not used.
+ * Before each draw call, a new dd_draw_record is created that contains
+ * a copy of all states. After each draw call, the driver's log is added
+ * to this record. Additionally, deferred fences are associated to each
+ * record both before and after the draw.
*
- * After each draw call, a new dd_draw_record is created that contains
- * a copy of all states, the output of pipe_context::dump_debug_state,
- * and it has a fence number assigned. That's done without knowing whether
- * that draw call is problematic or not. The record is added into the list
- * of all records.
- *
- * An independent, separate thread loops over the list of records and checks
- * their fences. Records with signalled fences are freed. On fence timeout,
- * the thread dumps the records of in-flight draws.
+ * The records are handed off to a separate thread which waits on the
+ * records' fences. Records with signalled fences are freed. When a timeout
+ * is detected, the thread dumps the records of in-flight draws.
*/
thrd_t thread;
mtx_t mutex;
cnd_t cond;
- struct dd_draw_record *record_pending; /* currently inside the driver */
struct list_head records; /* oldest record first */
unsigned num_records;
bool kill_thread;
bool api_stalled;
};
struct pipe_context *
dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
--
2.19.1
More information about the mesa-dev
mailing list