[Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw
Nicolai Hähnle
nhaehnle at gmail.com
Thu Dec 6 14:00:44 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This is a move towards using composition instead of inheritance for
different query types.
This change weakens out-of-memory error reporting somewhat, though this
should be acceptable since we didn't consistently report such errors in
the first place.
---
src/gallium/drivers/radeonsi/si_perfcounter.c | 8 +-
src/gallium/drivers/radeonsi/si_query.c | 177 +++++++++---------
src/gallium/drivers/radeonsi/si_query.h | 17 +-
src/gallium/drivers/radeonsi/si_texture.c | 7 +-
4 files changed, 99 insertions(+), 110 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0b3d8f89273..f0d10c054c4 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct si_screen *sscreen,
struct si_query_group *group = query->groups;
query->groups = group->next;
FREE(group);
}
FREE(query->counters);
si_query_hw_destroy(sscreen, rquery);
}
-static bool si_pc_query_prepare_buffer(struct si_screen *screen,
- struct si_query_hw *hwquery,
- struct r600_resource *buffer)
+static bool si_pc_query_prepare_buffer(struct si_context *ctx,
+ struct si_query_buffer *qbuf)
{
/* no-op */
return true;
}
static void si_pc_query_emit_start(struct si_context *sctx,
struct si_query_hw *hwquery,
struct r600_resource *buffer, uint64_t va)
{
struct si_query_pc *query = (struct si_query_pc *)hwquery;
@@ -1055,23 +1054,20 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
counter->base = group->result_base + j;
counter->stride = group->num_counters;
counter->qwords = 1;
if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
}
- if (!si_query_hw_init(screen, &query->b))
- goto error;
-
return (struct pipe_query *)query;
error:
si_pc_query_destroy(screen, &query->b.b);
return NULL;
}
static bool si_init_block_names(struct si_screen *screen,
struct si_pc_block *block)
{
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 479a1bbf2c4..5b0fba0ed92 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -514,86 +514,129 @@ static struct pipe_query *si_query_sw_create(unsigned query_type)
query = CALLOC_STRUCT(si_query_sw);
if (!query)
return NULL;
query->b.type = query_type;
query->b.ops = &sw_query_ops;
return (struct pipe_query *)query;
}
-void si_query_hw_destroy(struct si_screen *sscreen,
- struct si_query *rquery)
+void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer)
{
- struct si_query_hw *query = (struct si_query_hw *)rquery;
- struct si_query_buffer *prev = query->buffer.previous;
+ struct si_query_buffer *prev = buffer->previous;
/* Release all query buffers. */
while (prev) {
struct si_query_buffer *qbuf = prev;
prev = prev->previous;
r600_resource_reference(&qbuf->buf, NULL);
FREE(qbuf);
}
- r600_resource_reference(&query->buffer.buf, NULL);
- r600_resource_reference(&query->workaround_buf, NULL);
- FREE(rquery);
+ r600_resource_reference(&buffer->buf, NULL);
+}
+
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer)
+{
+ /* Discard all query buffers except for the oldest. */
+ while (buffer->previous) {
+ struct si_query_buffer *qbuf = buffer->previous;
+ buffer->previous = qbuf->previous;
+
+ r600_resource_reference(&buffer->buf, NULL);
+ buffer->buf = qbuf->buf; /* move ownership */
+ FREE(qbuf);
+ }
+ buffer->results_end = 0;
+
+ /* Discard even the oldest buffer if it can't be mapped without a stall. */
+ if (buffer->buf &&
+ (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
+ !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE))) {
+ r600_resource_reference(&buffer->buf, NULL);
+ }
}
-static struct r600_resource *si_new_query_buffer(struct si_screen *sscreen,
- struct si_query_hw *query)
+bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
+ bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
+ unsigned size)
{
- unsigned buf_size = MAX2(query->result_size,
- sscreen->info.min_alloc_size);
+ if (buffer->buf && buffer->results_end + size >= buffer->buf->b.b.width0)
+ return true;
+
+ if (buffer->buf) {
+ struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
+ memcpy(qbuf, buffer, sizeof(*qbuf));
+ buffer->previous = qbuf;
+ }
+
+ buffer->results_end = 0;
/* Queries are normally read by the CPU after
* being written by the gpu, hence staging is probably a good
* usage pattern.
*/
- struct r600_resource *buf = r600_resource(
- pipe_buffer_create(&sscreen->b, 0,
- PIPE_USAGE_STAGING, buf_size));
- if (!buf)
- return NULL;
+ struct si_screen *screen = sctx->screen;
+ unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
+ buffer->buf = r600_resource(
+ pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
+ if (unlikely(!buffer->buf))
+ return false;
- if (!query->ops->prepare_buffer(sscreen, query, buf)) {
- r600_resource_reference(&buf, NULL);
- return NULL;
+ if (prepare_buffer) {
+ if (unlikely(!prepare_buffer(sctx, buffer))) {
+ r600_resource_reference(&buffer->buf, NULL);
+ return false;
+ }
}
- return buf;
+ return true;
}
-static bool si_query_hw_prepare_buffer(struct si_screen *sscreen,
- struct si_query_hw *query,
- struct r600_resource *buffer)
+
+void si_query_hw_destroy(struct si_screen *sscreen,
+ struct si_query *rquery)
+{
+ struct si_query_hw *query = (struct si_query_hw *)rquery;
+
+ si_query_buffer_destroy(sscreen, &query->buffer);
+ r600_resource_reference(&query->workaround_buf, NULL);
+ FREE(rquery);
+}
+
+static bool si_query_hw_prepare_buffer(struct si_context *sctx,
+ struct si_query_buffer *qbuf)
{
- /* Callers ensure that the buffer is currently unused by the GPU. */
- uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL,
+ static const struct si_query_hw si_query_hw_s;
+ struct si_query_hw *query = container_of(qbuf, &si_query_hw_s, buffer);
+ struct si_screen *screen = sctx->screen;
+
+ /* The caller ensures that the buffer is currently unused by the GPU. */
+ uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, NULL,
PIPE_TRANSFER_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED);
if (!results)
return false;
- memset(results, 0, buffer->b.b.width0);
+ memset(results, 0, qbuf->buf->b.b.width0);
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
- unsigned max_rbs = sscreen->info.num_render_backends;
- unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask;
+ unsigned max_rbs = screen->info.num_render_backends;
+ unsigned enabled_rb_mask = screen->info.enabled_rb_mask;
unsigned num_results;
unsigned i, j;
/* Set top bits for unused backends. */
- num_results = buffer->b.b.width0 / query->result_size;
+ num_results = qbuf->buf->b.b.width0 / query->result_size;
for (j = 0; j < num_results; j++) {
for (i = 0; i < max_rbs; i++) {
if (!(enabled_rb_mask & (1<<i))) {
results[(i * 4)+1] = 0x80000000;
results[(i * 4)+3] = 0x80000000;
}
}
results += 4 * max_rbs;
}
}
@@ -624,30 +667,20 @@ static void si_query_hw_clear_result(struct si_query_hw *,
union pipe_query_result *);
static struct si_query_hw_ops query_hw_default_hw_ops = {
.prepare_buffer = si_query_hw_prepare_buffer,
.emit_start = si_query_hw_do_emit_start,
.emit_stop = si_query_hw_do_emit_stop,
.clear_result = si_query_hw_clear_result,
.add_result = si_query_hw_add_result,
};
-bool si_query_hw_init(struct si_screen *sscreen,
- struct si_query_hw *query)
-{
- query->buffer.buf = si_new_query_buffer(sscreen, query);
- if (!query->buffer.buf)
- return false;
-
- return true;
-}
-
static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
unsigned query_type,
unsigned index)
{
struct si_query_hw *query = CALLOC_STRUCT(si_query_hw);
if (!query)
return NULL;
query->b.type = query_type;
query->b.ops = &query_hw_ops;
@@ -693,25 +726,20 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
query->result_size = 11 * 16;
query->result_size += 8; /* for the fence + alignment */
query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
break;
default:
assert(0);
FREE(query);
return NULL;
}
- if (!si_query_hw_init(sscreen, query)) {
- FREE(query);
- return NULL;
- }
-
return (struct pipe_query *)query;
}
static void si_update_occlusion_query_state(struct si_context *sctx,
unsigned type, int diff)
{
if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
type == PIPE_QUERY_OCCLUSION_PREDICATE ||
type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
bool old_enable = sctx->num_occlusion_queries != 0;
@@ -802,43 +830,31 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
}
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_QUERY);
}
static void si_query_hw_emit_start(struct si_context *sctx,
struct si_query_hw *query)
{
uint64_t va;
- if (!query->buffer.buf)
- return; // previous buffer allocation failure
+ if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
+ query->result_size))
+ return;
si_update_occlusion_query_state(sctx, query->b.type, 1);
si_update_prims_generated_query_state(sctx, query->b.type, 1);
if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
si_need_gfx_cs_space(sctx);
- /* Get a new query buffer if needed. */
- if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
- struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
- *qbuf = query->buffer;
- query->buffer.results_end = 0;
- query->buffer.previous = qbuf;
- query->buffer.buf = si_new_query_buffer(sctx->screen, query);
- if (!query->buffer.buf)
- return;
- }
-
- /* emit begin query */
va = query->buffer.buf->gpu_address + query->buffer.results_end;
-
query->ops->emit_start(sctx, query, query->buffer.buf, va);
}
static void si_query_hw_do_emit_stop(struct si_context *sctx,
struct si_query_hw *query,
struct r600_resource *buffer,
uint64_t va)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
uint64_t fence_va = 0;
@@ -905,26 +921,30 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
query->buffer.buf, fence_va, 0x80000000,
query->b.type);
}
}
static void si_query_hw_emit_stop(struct si_context *sctx,
struct si_query_hw *query)
{
uint64_t va;
- if (!query->buffer.buf)
- return; // previous buffer allocation failure
-
/* The queries which need begin already called this in begin_query. */
- if (query->flags & SI_QUERY_HW_FLAG_NO_START)
+ if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
si_need_gfx_cs_space(sctx);
+ if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
+ query->result_size))
+ return;
+ }
+
+ if (!query->buffer.buf)
+ return; // previous buffer allocation failure
/* emit end query */
va = query->buffer.buf->gpu_address + query->buffer.results_end;
query->ops->emit_stop(sctx, query, query->buffer.buf, va);
query->buffer.results_end += query->result_size;
si_update_occlusion_query_state(sctx, query->b.type, -1);
si_update_prims_generated_query_state(sctx, query->b.type, -1);
@@ -1054,59 +1074,32 @@ static void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
static boolean si_begin_query(struct pipe_context *ctx,
struct pipe_query *query)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_query *rquery = (struct si_query *)query;
return rquery->ops->begin(sctx, rquery);
}
-void si_query_hw_reset_buffers(struct si_context *sctx,
- struct si_query_hw *query)
-{
- struct si_query_buffer *prev = query->buffer.previous;
-
- /* Discard the old query buffers. */
- while (prev) {
- struct si_query_buffer *qbuf = prev;
- prev = prev->previous;
- r600_resource_reference(&qbuf->buf, NULL);
- FREE(qbuf);
- }
-
- query->buffer.results_end = 0;
- query->buffer.previous = NULL;
-
- /* Obtain a new buffer if the current one can't be mapped without a stall. */
- if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
- !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
- r600_resource_reference(&query->buffer.buf, NULL);
- query->buffer.buf = si_new_query_buffer(sctx->screen, query);
- } else {
- if (!query->ops->prepare_buffer(sctx->screen, query, query->buffer.buf))
- r600_resource_reference(&query->buffer.buf, NULL);
- }
-}
-
bool si_query_hw_begin(struct si_context *sctx,
struct si_query *rquery)
{
struct si_query_hw *query = (struct si_query_hw *)rquery;
if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
assert(0);
return false;
}
if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
- si_query_hw_reset_buffers(sctx, query);
+ si_query_buffer_reset(sctx, &query->buffer);
r600_resource_reference(&query->workaround_buf, NULL);
si_query_hw_emit_start(sctx, query);
if (!query->buffer.buf)
return false;
LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
return true;
@@ -1119,21 +1112,21 @@ static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
return rquery->ops->end(sctx, rquery);
}
bool si_query_hw_end(struct si_context *sctx,
struct si_query *rquery)
{
struct si_query_hw *query = (struct si_query_hw *)rquery;
if (query->flags & SI_QUERY_HW_FLAG_NO_START)
- si_query_hw_reset_buffers(sctx, query);
+ si_query_buffer_reset(sctx, &query->buffer);
si_query_hw_emit_stop(sctx, query);
if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
LIST_DELINIT(&query->b.active_list);
sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
}
if (!query->buffer.buf)
return false;
diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h
index ebd965a004f..63af760a271 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -27,20 +27,21 @@
#include "util/u_threaded_context.h"
struct pipe_context;
struct pipe_query;
struct pipe_resource;
struct si_screen;
struct si_context;
struct si_query;
+struct si_query_buffer;
struct si_query_hw;
struct r600_resource;
enum {
SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
SI_QUERY_DECOMPRESS_CALLS,
SI_QUERY_MRT_DRAW_CALLS,
SI_QUERY_PRIM_RESTART_CALLS,
SI_QUERY_SPILL_DRAW_CALLS,
SI_QUERY_COMPUTE_CALLS,
@@ -153,23 +154,21 @@ struct si_query {
};
enum {
SI_QUERY_HW_FLAG_NO_START = (1 << 0),
/* gap */
/* whether begin_query doesn't clear the result */
SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
};
struct si_query_hw_ops {
- bool (*prepare_buffer)(struct si_screen *,
- struct si_query_hw *,
- struct r600_resource *);
+ bool (*prepare_buffer)(struct si_context *, struct si_query_buffer *);
void (*emit_start)(struct si_context *,
struct si_query_hw *,
struct r600_resource *buffer, uint64_t va);
void (*emit_stop)(struct si_context *,
struct si_query_hw *,
struct r600_resource *buffer, uint64_t va);
void (*clear_result)(struct si_query_hw *, union pipe_query_result *);
void (*add_result)(struct si_screen *screen,
struct si_query_hw *, void *buffer,
union pipe_query_result *result);
@@ -179,40 +178,45 @@ struct si_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource *buf;
/* Offset of the next free result after current query data */
unsigned results_end;
/* If a query buffer is full, a new buffer is created and the old one
* is put in here. When we calculate the result, we sum up the samples
* from all buffers. */
struct si_query_buffer *previous;
};
+void si_query_buffer_destroy(struct si_screen *sctx, struct si_query_buffer *buffer);
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer);
+bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
+ bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
+ unsigned size);
+
+
struct si_query_hw {
struct si_query b;
struct si_query_hw_ops *ops;
unsigned flags;
/* The query buffer and how many results are in it. */
struct si_query_buffer buffer;
/* Size of the result in memory for both begin_query and end_query,
* this can be one or two numbers, or it could even be a size of a structure. */
unsigned result_size;
/* For transform feedback: which stream the query is for */
unsigned stream;
/* Workaround via compute shader */
struct r600_resource *workaround_buf;
unsigned workaround_offset;
};
-bool si_query_hw_init(struct si_screen *sscreen,
- struct si_query_hw *query);
void si_query_hw_destroy(struct si_screen *sscreen,
struct si_query *rquery);
bool si_query_hw_begin(struct si_context *sctx,
struct si_query *rquery);
bool si_query_hw_end(struct si_context *sctx,
struct si_query *rquery);
bool si_query_hw_get_result(struct si_context *sctx,
struct si_query *rquery,
bool wait,
union pipe_query_result *result);
@@ -237,20 +241,17 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types);
int si_get_perfcounter_info(struct si_screen *,
unsigned index,
struct pipe_driver_query_info *info);
int si_get_perfcounter_group_info(struct si_screen *,
unsigned index,
struct pipe_driver_query_group_info *info);
-void si_query_hw_reset_buffers(struct si_context *sctx,
- struct si_query_hw *query);
-
struct si_qbo_state {
void *saved_compute;
struct pipe_constant_buffer saved_const0;
struct pipe_shader_buffer saved_ssbo[3];
};
#endif /* SI_QUERY_H */
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index ac1a0aa6097..9df12e0f5bd 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -2276,25 +2276,24 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
struct si_context *sctx = (struct si_context*)ctx;
struct pipe_query *tmp;
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
bool query_active = sctx->dcc_stats[i].query_active;
bool disable = false;
if (sctx->dcc_stats[i].ps_stats[2]) {
union pipe_query_result result;
/* Read the results. */
- ctx->get_query_result(ctx, sctx->dcc_stats[i].ps_stats[2],
+ struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2];
+ ctx->get_query_result(ctx, query,
true, &result);
- si_query_hw_reset_buffers(sctx,
- (struct si_query_hw*)
- sctx->dcc_stats[i].ps_stats[2]);
+ si_query_buffer_reset(sctx, &((struct si_query_hw*)query)->buffer);
/* Compute the approximate number of fullscreen draws. */
tex->ps_draw_ratio =
result.pipeline_statistics.ps_invocations /
(tex->buffer.b.b.width0 * tex->buffer.b.b.height0);
sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
disable = tex->dcc_separate_buffer &&
!vi_should_enable_separate_dcc(tex);
}
--
2.19.1
More information about the mesa-dev
mailing list