[Mesa-dev] [PATCH 05/10] radeon: convert software queries to the new style
Nicolai Hähnle
nhaehnle at gmail.com
Fri Nov 13 08:10:06 PST 2015
Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.
Note that this fixes a fence leak with PIPE_QUERY_GPU_FINISHED, and it
contains Marek's fix to GPU_FINISHED's end_query() handling.
---
src/gallium/drivers/radeon/r600_query.c | 366 +++++++++++++++++---------------
1 file changed, 194 insertions(+), 172 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index fdab8e3..c7350f1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -51,15 +51,195 @@ struct r600_query {
unsigned num_cs_dw;
/* linked list of queries */
struct list_head list;
- /* for custom non-GPU queries */
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+ struct r600_query b;
+
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
- /* For transform feedback: which stream the query is for */
- unsigned stream;
};
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ screen->fence_reference(screen, &query->fence, NULL);
+ FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+ case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+ case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+ case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+ case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+ case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+ case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+ case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+ case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ default: unreachable("query type does not correspond to winsys id");
+ }
+}
+
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_CS_FLUSHES:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GPU_LOAD:
+ query->begin_result = r600_gpu_load_begin(rctx->screen);
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_begin: bad query type");
+ }
+
+ return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ rctx->b.flush(&rctx->b, &query->fence, 0);
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_CS_FLUSHES:
+ case R600_QUERY_NUM_BYTES_MOVED: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GPU_LOAD:
+ query->end_result = r600_gpu_load_end(rctx->screen,
+ query->begin_result);
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ default:
+ unreachable("r600_query_sw_end: bad query type");
+ }
+}
+
+static boolean r600_query_sw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ boolean wait,
+ union pipe_query_result *result)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* Convert from cycles per millisecond to cycles per second (Hz). */
+ result->timestamp_disjoint.frequency =
+ (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ result->timestamp_disjoint.disjoint = FALSE;
+ return TRUE;
+ case PIPE_QUERY_GPU_FINISHED: {
+ struct pipe_screen *screen = rctx->b.screen;
+ result->b = screen->fence_finish(screen, query->fence,
+ wait ? PIPE_TIMEOUT_INFINITE : 0);
+ return result->b;
+ }
+ }
+
+ result->u64 = query->end_result - query->begin_result;
+
+ switch (query->b.type) {
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GPU_TEMPERATURE:
+ result->u64 /= 1000;
+ break;
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ result->u64 *= 1000000;
+ break;
+ }
+
+ return TRUE;
+}
+
+static struct r600_query_ops sw_query_ops = {
+ .destroy = r600_query_sw_destroy,
+ .begin = r600_query_sw_begin,
+ .end = r600_query_sw_end,
+ .get_result = r600_query_sw_get_result
+};
+
+static struct pipe_query *r600_query_sw_create(struct pipe_context *ctx,
+ unsigned query_type)
+{
+ struct r600_query_sw *query;
+
+ query = CALLOC_STRUCT(r600_query_sw);
+ if (query == NULL)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &sw_query_ops;
+
+ return (struct pipe_query *)query;
+}
+
static void r600_do_destroy_query(struct r600_common_context *, struct r600_query *);
static boolean r600_do_begin_query(struct r600_common_context *, struct r600_query *);
static void r600_do_end_query(struct r600_common_context *, struct r600_query *);
@@ -86,8 +266,7 @@ static bool r600_is_timer_query(unsigned type)
static bool r600_query_needs_begin(unsigned type)
{
- return type != PIPE_QUERY_GPU_FINISHED &&
- type != PIPE_QUERY_TIMESTAMP;
+ return type != PIPE_QUERY_TIMESTAMP;
}
static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
@@ -95,27 +274,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
unsigned j, i, num_results, buf_size = 4096;
uint32_t *results;
- /* Non-GPU queries. */
- switch (type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- return NULL;
- }
-
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
@@ -377,7 +535,11 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *query;
- bool skip_allocation = false;
+
+ if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+ query_type == PIPE_QUERY_GPU_FINISHED ||
+ query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return r600_query_sw_create(ctx, query_type);
query = CALLOC_STRUCT(r600_query);
if (query == NULL)
@@ -414,38 +576,18 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
query->num_cs_dw = 6;
break;
- /* Non-GPU queries and queries not requiring a buffer. */
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_GPU_LOAD:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- skip_allocation = true;
- break;
default:
assert(0);
FREE(query);
return NULL;
}
- if (!skip_allocation) {
- query->buffer.buf = r600_new_query_buffer(rctx, query_type);
- if (!query->buffer.buf) {
- FREE(query);
- return NULL;
- }
+ query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+ if (!query->buffer.buf) {
+ FREE(query);
+ return NULL;
}
+
return (struct pipe_query*)query;
}
@@ -493,42 +635,6 @@ static boolean r600_do_begin_query(struct r600_common_context *rctx,
return false;
}
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return true;
- case R600_QUERY_DRAW_CALLS:
- rquery->begin_result = rctx->num_draw_calls;
- return true;
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->begin_result = 0;
- return true;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return true;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return true;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return true;
- case R600_QUERY_GPU_LOAD:
- rquery->begin_result = r600_gpu_load_begin(rctx->screen);
- return true;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
- return true;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return true;
- }
-
/* Discard the old query buffers. */
while (prev) {
struct r600_query_buffer *qbuf = prev;
@@ -567,57 +673,6 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
static void r600_do_end_query(struct r600_common_context *rctx,
struct r600_query *rquery)
{
- /* Non-GPU queries. */
- switch (rquery->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- return;
- case PIPE_QUERY_GPU_FINISHED:
- rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence);
- return;
- case R600_QUERY_DRAW_CALLS:
- rquery->end_result = rctx->num_draw_calls;
- return;
- case R600_QUERY_REQUESTED_VRAM:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
- return;
- case R600_QUERY_REQUESTED_GTT:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
- return;
- case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
- return;
- case R600_QUERY_NUM_CS_FLUSHES:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
- return;
- case R600_QUERY_NUM_BYTES_MOVED:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
- return;
- case R600_QUERY_VRAM_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
- return;
- case R600_QUERY_GTT_USAGE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
- return;
- case R600_QUERY_GPU_TEMPERATURE:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
- return;
- case R600_QUERY_CURRENT_GPU_SCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
- return;
- case R600_QUERY_CURRENT_GPU_MCLK:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
- return;
- case R600_QUERY_GPU_LOAD:
- rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
- return;
- case R600_QUERY_NUM_COMPILATIONS:
- rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
- return;
- case R600_QUERY_NUM_SHADERS_CREATED:
- rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
- return;
- }
-
r600_emit_query_end(rctx, rquery);
if (r600_query_needs_begin(rquery->type))
@@ -648,42 +703,9 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
boolean wait,
union pipe_query_result *result)
{
- struct pipe_screen *screen = ctx->b.screen;
unsigned results_base = 0;
char *map;
- /* Non-GPU queries. */
- switch (query->type) {
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* Convert from cycles per millisecond to cycles per second (Hz). */
- result->timestamp_disjoint.frequency =
- (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
- result->timestamp_disjoint.disjoint = FALSE;
- return TRUE;
- case PIPE_QUERY_GPU_FINISHED:
- result->b = screen->fence_finish(screen, query->fence,
- wait ? PIPE_TIMEOUT_INFINITE : 0);
- return result->b;
- case R600_QUERY_DRAW_CALLS:
- case R600_QUERY_REQUESTED_VRAM:
- case R600_QUERY_REQUESTED_GTT:
- case R600_QUERY_BUFFER_WAIT_TIME:
- case R600_QUERY_NUM_CS_FLUSHES:
- case R600_QUERY_NUM_BYTES_MOVED:
- case R600_QUERY_VRAM_USAGE:
- case R600_QUERY_GTT_USAGE:
- case R600_QUERY_GPU_TEMPERATURE:
- case R600_QUERY_CURRENT_GPU_SCLK:
- case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_NUM_COMPILATIONS:
- case R600_QUERY_NUM_SHADERS_CREATED:
- result->u64 = query->end_result - query->begin_result;
- return TRUE;
- case R600_QUERY_GPU_LOAD:
- result->u64 = query->end_result;
- return TRUE;
- }
-
map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
PIPE_TRANSFER_READ |
(wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
--
2.5.0
More information about the mesa-dev
mailing list